1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 51 PetscInt subset_size; 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 } 56 57 /* min/max and threshold */ 58 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 59 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 60 nmax = PetscMax(nmin,nmax); 61 allocated_S_St = PETSC_FALSE; 62 if (nmin) { 63 allocated_S_St = PETSC_TRUE; 64 } 65 66 /* allocate lapack workspace */ 67 cum = cum2 = 0; 68 maxneigs = 0; 69 for (i=0;i<sub_schurs->n_subs;i++) { 70 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 71 PetscInt n,subset_size; 72 73 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 74 n = PetscMin(subset_size,nmax); 75 cum += subset_size; 76 cum2 += subset_size*n; 77 maxneigs = PetscMax(maxneigs,n); 78 } 79 } 80 if (mss) { 81 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 82 PetscBLASInt B_itype = 1; 83 PetscBLASInt B_N = mss; 84 PetscReal zero = 0.0; 85 PetscReal eps = 0.0; /* dlamch? */ 86 87 B_lwork = -1; 88 S = NULL; 89 St = NULL; 90 eigs = NULL; 91 eigv = NULL; 92 B_iwork = NULL; 93 B_ifail = NULL; 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 112 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 PetscInt j,subset_size; 155 156 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 157 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 158 const PetscInt *idxs; 159 PetscReal infty = PETSC_MAX_REAL; 160 PetscInt eigs_start = 0; 161 PetscBLASInt B_N; 162 PetscBool same_data = PETSC_FALSE; 163 164 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 165 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 166 if (sub_schurs->is_hermitian) { 167 PetscInt j,k; 168 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 169 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 170 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 171 } 172 for (j=0;j<subset_size;j++) { 173 for (k=j;k<subset_size;k++) { 174 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 175 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 176 } 177 } 178 } else { 179 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 180 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 181 } 182 } else { 183 S = Sarray + cumarray; 184 St = Starray + cumarray; 185 } 186 187 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 188 /* see if we can save some work */ 189 if (sub_schurs->n_subs == 1) { 190 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 191 } 192 193 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 194 B_neigs = 0; 195 } else { 196 /* Threshold: this is an heuristic for edges */ 197 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 198 199 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 200 PetscBLASInt B_itype = 1; 201 PetscBLASInt B_IL, B_IU; 202 PetscReal eps = -1.0; /* dlamch? */ 203 PetscInt nmin_s; 204 205 /* ask for eigenvalues larger than thresh */ 206 if (pcbddc->dbg_flag) { 207 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 208 } 209 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 210 #if defined(PETSC_USE_COMPLEX) 211 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 212 #else 213 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 214 #endif 215 ierr = PetscFPTrapPop();CHKERRQ(ierr); 216 if (B_ierr) { 217 if (B_ierr < 0 ) { 218 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 219 } else if (B_ierr <= B_N) { 220 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 221 } else { 222 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 223 } 224 } 225 226 if (B_neigs > nmax) { 227 if (pcbddc->dbg_flag) { 228 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 229 } 230 eigs_start = B_neigs -nmax; 231 B_neigs = nmax; 232 } 233 234 nmin_s = PetscMin(nmin,B_N); 235 if (B_neigs < nmin_s) { 236 PetscBLASInt B_neigs2; 237 238 B_IU = B_N - B_neigs; 239 B_IL = B_N - nmin_s + 1; 240 if (pcbddc->dbg_flag) { 241 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 242 } 243 if (sub_schurs->is_hermitian) { 244 PetscInt j; 245 for (j=0;j<subset_size;j++) { 246 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 247 } 248 for (j=0;j<subset_size;j++) { 249 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 250 } 251 } else { 252 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 254 } 255 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 256 #if defined(PETSC_USE_COMPLEX) 257 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 258 #else 259 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 260 #endif 261 ierr = PetscFPTrapPop();CHKERRQ(ierr); 262 B_neigs += B_neigs2; 263 } 264 if (B_ierr) { 265 if (B_ierr < 0 ) { 266 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 267 } else if (B_ierr <= B_N) { 268 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 269 } else { 270 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 271 } 272 } 273 if (pcbddc->dbg_flag) { 274 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 275 for (j=0;j<B_neigs;j++) { 276 if (eigs[j] == 0.0) { 277 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 278 } else { 279 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]); 280 } 281 } 282 } 283 } else { 284 /* TODO */ 285 } 286 } 287 maxneigs = PetscMax(B_neigs,maxneigs); 288 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 289 if (B_neigs) { 290 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 291 292 if (pcbddc->dbg_flag > 1) { 293 PetscInt ii; 294 for (ii=0;ii<B_neigs;ii++) { 295 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 296 for (j=0;j<B_N;j++) { 297 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 298 } 299 } 300 } 301 #if 0 302 for (j=0;j<B_neigs;j++) { 303 PetscBLASInt Blas_N,Blas_one = 1.0; 304 PetscScalar norm; 305 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 306 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 307 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 308 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 309 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 310 } else { 311 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 312 } 313 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 314 } 315 #endif 316 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 317 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 318 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 319 cum++; 320 } 321 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 322 } 323 /* shift for next computation */ 324 cumarray += subset_size*subset_size; 325 } 326 if (pcbddc->dbg_flag) { 327 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 328 } 329 330 if (mss) { 331 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 332 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 333 /* destroy matrices (junk) */ 334 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 335 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 336 } 337 if (allocated_S_St) { 338 ierr = PetscFree2(S,St);CHKERRQ(ierr); 339 } 340 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 341 #if defined(PETSC_USE_COMPLEX) 342 ierr = PetscFree(rwork);CHKERRQ(ierr); 343 #endif 344 if (pcbddc->dbg_flag) { 345 PetscInt maxneigs_r; 346 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 347 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 348 } 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "PCBDDCSetUpSolvers" 354 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 355 { 356 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 357 PetscScalar *coarse_submat_vals; 358 PetscErrorCode ierr; 359 360 PetscFunctionBegin; 361 /* Setup local scatters R_to_B and (optionally) R_to_D */ 362 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 363 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 364 365 /* Setup local neumann solver ksp_R */ 366 /* PCBDDCSetUpLocalScatters should be called first! */ 367 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 368 369 /* Change global null space passed in by the user if change of basis has been requested */ 370 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 371 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 372 } 373 374 /* 375 Setup local correction and local part of coarse basis. 376 Gives back the dense local part of the coarse matrix in column major ordering 377 */ 378 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 379 380 /* Compute total number of coarse nodes and setup coarse solver */ 381 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 382 383 /* free */ 384 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCBDDCResetCustomization" 390 PetscErrorCode PCBDDCResetCustomization(PC pc) 391 { 392 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 393 PetscErrorCode ierr; 394 395 PetscFunctionBegin; 396 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 397 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 398 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 399 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 400 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 401 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 402 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 403 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 404 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 405 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 406 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 407 PetscFunctionReturn(0); 408 } 409 410 #undef __FUNCT__ 411 #define __FUNCT__ "PCBDDCResetTopography" 412 PetscErrorCode PCBDDCResetTopography(PC pc) 413 { 414 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 419 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 420 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 421 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 422 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "PCBDDCResetSolvers" 428 PetscErrorCode PCBDDCResetSolvers(PC pc) 429 { 430 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 431 PetscScalar *array; 432 PetscErrorCode ierr; 433 434 PetscFunctionBegin; 435 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 436 if (pcbddc->coarse_phi_B) { 437 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 438 ierr = PetscFree(array);CHKERRQ(ierr); 439 } 440 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 441 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 442 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 443 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 444 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 445 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 446 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 447 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 448 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 449 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 450 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 451 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 452 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 453 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 454 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 455 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 456 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 457 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 458 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 459 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 460 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 461 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 462 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 468 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 469 { 470 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 471 PC_IS *pcis = (PC_IS*)pc->data; 472 VecType impVecType; 473 PetscInt n_constraints,n_R,old_size; 474 PetscErrorCode ierr; 475 476 PetscFunctionBegin; 477 if (!pcbddc->ConstraintMatrix) { 478 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 479 } 480 /* get sizes */ 481 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 482 n_R = pcis->n-pcbddc->n_vertices; 483 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 484 /* local work vectors (try to avoid unneeded work)*/ 485 /* R nodes */ 486 old_size = -1; 487 if (pcbddc->vec1_R) { 488 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 489 } 490 if (n_R != old_size) { 491 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 492 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 493 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 494 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 495 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 496 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 497 } 498 /* local primal dofs */ 499 old_size = -1; 500 if (pcbddc->vec1_P) { 501 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 502 } 503 if (pcbddc->local_primal_size != old_size) { 504 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 505 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 506 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 507 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 508 } 509 /* local explicit constraints */ 510 old_size = -1; 511 if (pcbddc->vec1_C) { 512 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 513 } 514 if (n_constraints && n_constraints != old_size) { 515 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 516 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 517 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 518 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 519 } 520 PetscFunctionReturn(0); 521 } 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "PCBDDCSetUpCorrection" 525 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 526 { 527 PetscErrorCode ierr; 528 /* pointers to pcis and pcbddc */ 529 PC_IS* pcis = (PC_IS*)pc->data; 530 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 531 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 532 /* submatrices of local problem */ 533 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 534 /* submatrices of local coarse problem */ 535 Mat S_VV,S_CV,S_VC,S_CC; 536 /* working matrices */ 537 Mat C_CR; 538 /* additional working stuff */ 539 PC pc_R; 540 Mat F; 541 PetscBool isLU,isCHOL,isILU; 542 543 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 544 PetscScalar *work; 545 PetscInt *idx_V_B; 546 PetscInt n,n_vertices,n_constraints; 547 PetscInt i,n_R,n_D,n_B; 548 PetscBool unsymmetric_check; 549 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 550 MatType impMatType; 551 /* some shortcuts to scalars */ 552 PetscScalar one=1.0,m_one=-1.0; 553 554 PetscFunctionBegin; 555 n_vertices = pcbddc->n_vertices; 556 n_constraints = pcbddc->local_primal_size-n_vertices; 557 /* Set Non-overlapping dimensions */ 558 n_B = pcis->n_B; 559 n_D = pcis->n - n_B; 560 n_R = pcis->n - n_vertices; 561 562 /* Set types for local objects needed by BDDC precondtioner */ 563 impMatType = MATSEQDENSE; 564 565 /* vertices in boundary numbering */ 566 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 567 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 568 if (i != n_vertices) { 569 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 570 } 571 572 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 573 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 574 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 575 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 576 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 577 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 578 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 579 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 580 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 581 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 582 583 unsymmetric_check = PETSC_FALSE; 584 /* allocate workspace */ 585 n = 0; 586 if (n_constraints) { 587 n += n_R*n_constraints; 588 } 589 if (n_vertices) { 590 n = PetscMax(2*n_R*n_vertices,n); 591 n = PetscMax((n_R+n_B)*n_vertices,n); 592 } 593 if (!pcbddc->symmetric_primal) { 594 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 595 unsymmetric_check = PETSC_TRUE; 596 } 597 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 598 599 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 600 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 601 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 602 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 603 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 604 if (isLU || isILU || isCHOL) { 605 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 606 } else if (sub_schurs->reuse_mumps) { 607 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 608 MatFactorType type; 609 610 F = reuse_mumps->F; 611 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 612 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 613 } else { 614 F = NULL; 615 } 616 617 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 618 if (n_constraints) { 619 Mat M1,M2,M3; 620 Mat auxmat; 621 IS is_aux; 622 PetscScalar *array,*array2; 623 624 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 625 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 626 627 /* Extract constraints on R nodes: C_{CR} */ 628 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 629 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 630 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 631 632 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 633 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 634 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 635 for (i=0;i<n_constraints;i++) { 636 const PetscScalar *row_cmat_values; 637 const PetscInt *row_cmat_indices; 638 PetscInt size_of_constraint,j; 639 640 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 641 for (j=0;j<size_of_constraint;j++) { 642 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 643 } 644 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 645 } 646 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 647 if (F) { 648 Mat B; 649 650 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 651 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 652 ierr = MatDestroy(&B);CHKERRQ(ierr); 653 } else { 654 PetscScalar *marr; 655 656 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 657 for (i=0;i<n_constraints;i++) { 658 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 659 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 660 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 661 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 662 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 663 } 664 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 665 } 666 if (!pcbddc->switch_static) { 667 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 668 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 669 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 670 for (i=0;i<n_constraints;i++) { 671 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 672 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 673 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 674 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 675 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 676 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 677 } 678 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 679 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 680 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 681 } else { 682 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 683 pcbddc->local_auxmat2 = local_auxmat2_R; 684 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 685 } 686 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 687 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 688 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 689 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 690 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 691 if (isCHOL) { 692 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 693 } else { 694 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 695 } 696 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 697 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 698 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 699 ierr = MatDestroy(&M2);CHKERRQ(ierr); 700 ierr = MatDestroy(&M3);CHKERRQ(ierr); 701 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 702 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 703 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 704 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 705 ierr = MatDestroy(&M1);CHKERRQ(ierr); 706 } 707 /* Get submatrices from subdomain matrix */ 708 if (n_vertices) { 709 Mat newmat; 710 IS is_aux; 711 PetscInt ibs,mbs; 712 PetscBool issbaij; 713 714 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 715 IS tis; 716 717 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 718 ierr = ISSort(tis);CHKERRQ(ierr); 719 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 720 ierr = ISDestroy(&tis);CHKERRQ(ierr); 721 } else { 722 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 723 } 724 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 725 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 726 if (ibs != mbs) { /* need to convert to SEQAIJ */ 727 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 728 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 729 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 730 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 731 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 732 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 733 } else { 734 /* this is safe */ 735 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 736 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 737 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 738 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 739 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 740 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 741 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 742 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 743 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 744 } else { 745 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 746 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 747 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 748 } 749 /* need to convert to SeqAIJ for MatMatMult with dense matrix */ 750 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 751 } 752 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 753 } 754 755 /* Matrix of coarse basis functions (local) */ 756 if (pcbddc->coarse_phi_B) { 757 PetscInt on_B,on_primal,on_D=n_D; 758 if (pcbddc->coarse_phi_D) { 759 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 760 } 761 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 762 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 763 PetscScalar *marray; 764 765 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 766 ierr = PetscFree(marray);CHKERRQ(ierr); 767 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 768 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 769 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 770 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 771 } 772 } 773 774 if (!pcbddc->coarse_phi_B) { 775 PetscScalar *marray; 776 777 n = n_B*pcbddc->local_primal_size; 778 if (pcbddc->switch_static || pcbddc->dbg_flag) { 779 n += n_D*pcbddc->local_primal_size; 780 } 781 if (!pcbddc->symmetric_primal) { 782 n *= 2; 783 } 784 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 785 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 786 n = n_B*pcbddc->local_primal_size; 787 if (pcbddc->switch_static || pcbddc->dbg_flag) { 788 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 789 n += n_D*pcbddc->local_primal_size; 790 } 791 if (!pcbddc->symmetric_primal) { 792 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 793 if (pcbddc->switch_static || pcbddc->dbg_flag) { 794 n = n_B*pcbddc->local_primal_size; 795 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 796 } 797 } else { 798 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 799 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 800 if (pcbddc->switch_static || pcbddc->dbg_flag) { 801 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 802 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 803 } 804 } 805 } 806 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 807 /* vertices */ 808 if (n_vertices) { 809 810 if (n_R) { 811 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 812 PetscBLASInt B_N,B_one = 1; 813 PetscScalar *x,*y; 814 815 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 816 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 817 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 818 if (F) { /* TODO could be optimized for symmetric problems */ 819 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 820 } else { 821 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 822 for (i=0;i<n_vertices;i++) { 823 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 824 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 825 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 826 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 827 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 828 } 829 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 830 } 831 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 832 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 833 if (n_constraints) { 834 Mat B; 835 836 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 837 for (i=0;i<n_vertices;i++) { 838 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 839 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 840 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 841 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 842 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 843 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 844 } 845 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 846 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 847 ierr = MatDestroy(&B);CHKERRQ(ierr); 848 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 849 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 850 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 851 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 852 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 853 ierr = MatDestroy(&B);CHKERRQ(ierr); 854 } 855 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 856 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 857 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 858 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 859 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 860 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 861 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 862 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 863 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 864 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 865 } else { 866 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 867 } 868 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 869 /* coarse basis functions */ 870 for (i=0;i<n_vertices;i++) { 871 PetscScalar *y; 872 873 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 874 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 875 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 876 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 877 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 878 y[n_B*i+idx_V_B[i]] = 1.0; 879 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 880 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 881 882 if (pcbddc->switch_static || pcbddc->dbg_flag) { 883 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 884 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 885 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 888 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 889 } 890 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 891 } 892 } 893 894 if (n_constraints) { 895 Mat B; 896 897 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 898 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 899 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 900 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 901 if (n_vertices) { 902 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 903 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 904 } else { 905 Mat S_VCt; 906 907 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 908 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 909 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 910 } 911 } 912 ierr = MatDestroy(&B);CHKERRQ(ierr); 913 /* coarse basis functions */ 914 for (i=0;i<n_constraints;i++) { 915 PetscScalar *y; 916 917 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 918 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 919 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 920 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 921 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 923 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 924 if (pcbddc->switch_static || pcbddc->dbg_flag) { 925 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 926 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 927 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 929 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 930 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 931 } 932 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 933 } 934 } 935 if (n_constraints) { 936 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 937 } 938 939 /* compute other basis functions for non-symmetric problems */ 940 if (!pcbddc->symmetric_primal) { 941 942 if (n_constraints) { 943 Mat S_CCT,B_C; 944 945 /* this is a lazy thing */ 946 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 947 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 948 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 949 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 950 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 951 if (n_vertices) { 952 Mat B_V,S_VCT; 953 954 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 955 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 956 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 957 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 958 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 959 } 960 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 961 } else { /* if there are no constraints, reset work */ 962 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 963 } 964 if (n_vertices && n_R) { 965 Mat A_VRT; 966 PetscScalar *marray; 967 PetscBLASInt B_N,B_one = 1; 968 969 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 970 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 971 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 972 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 973 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 974 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 975 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 976 } 977 978 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 979 for (i=0;i<pcbddc->local_primal_size;i++) { 980 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 981 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 982 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 983 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 984 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 985 } 986 } else { 987 for (i=0;i<pcbddc->local_primal_size;i++) { 988 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 989 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 990 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 991 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 992 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 993 } 994 } 995 /* coarse basis functions */ 996 for (i=0;i<pcbddc->local_primal_size;i++) { 997 PetscScalar *y; 998 999 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1000 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1001 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1002 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1003 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 if (i<n_vertices) { 1005 y[n_B*i+idx_V_B[i]] = 1.0; 1006 } 1007 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1008 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1009 1010 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1011 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1012 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1013 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1015 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1016 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1017 } 1018 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1019 } 1020 } 1021 /* free memory */ 1022 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1023 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1024 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1025 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1026 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1027 ierr = PetscFree(work);CHKERRQ(ierr); 1028 if (n_vertices) { 1029 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1030 } 1031 if (n_constraints) { 1032 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1033 } 1034 /* Checking coarse_sub_mat and coarse basis functios */ 1035 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1036 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1037 if (pcbddc->dbg_flag) { 1038 Mat coarse_sub_mat; 1039 Mat AUXMAT,TM1,TM2,TM3,TM4; 1040 Mat coarse_phi_D,coarse_phi_B; 1041 Mat coarse_psi_D,coarse_psi_B; 1042 Mat A_II,A_BB,A_IB,A_BI; 1043 Mat C_B,CPHI; 1044 IS is_dummy; 1045 Vec mones; 1046 MatType checkmattype=MATSEQAIJ; 1047 PetscReal real_value; 1048 1049 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1050 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1051 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1052 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1053 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1054 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1055 if (unsymmetric_check) { 1056 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1057 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1058 } 1059 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1060 1061 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1062 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1063 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1064 if (unsymmetric_check) { 1065 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1066 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1067 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1068 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1069 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1070 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1071 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1072 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1073 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1074 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1075 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1076 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1077 } else { 1078 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1079 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1080 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1081 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1082 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1083 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1084 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1085 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1086 } 1087 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1088 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1089 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1090 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1091 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1092 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1093 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1095 1096 /* check constraints */ 1097 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1098 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1099 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1100 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1101 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1102 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1103 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1104 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1105 if (unsymmetric_check) { 1106 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1107 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1108 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1109 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1110 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1111 } 1112 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1113 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1114 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1115 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1116 1117 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1118 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1119 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1120 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1121 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1122 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1123 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1124 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1125 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1126 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1127 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1128 if (unsymmetric_check) { 1129 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1130 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1131 } 1132 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1133 } 1134 /* get back data */ 1135 *coarse_submat_vals_n = coarse_submat_vals; 1136 PetscFunctionReturn(0); 1137 } 1138 1139 #undef __FUNCT__ 1140 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1141 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1142 { 1143 Mat *work_mat; 1144 IS isrow_s,iscol_s; 1145 PetscBool rsorted,csorted; 1146 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1147 PetscErrorCode ierr; 1148 1149 PetscFunctionBegin; 1150 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1151 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1152 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1153 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1154 1155 if (!rsorted) { 1156 const PetscInt *idxs; 1157 PetscInt *idxs_sorted,i; 1158 1159 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1160 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1161 for (i=0;i<rsize;i++) { 1162 idxs_perm_r[i] = i; 1163 } 1164 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1165 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1166 for (i=0;i<rsize;i++) { 1167 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1168 } 1169 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1170 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1171 } else { 1172 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1173 isrow_s = isrow; 1174 } 1175 1176 if (!csorted) { 1177 if (isrow == iscol) { 1178 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1179 iscol_s = isrow_s; 1180 } else { 1181 const PetscInt *idxs; 1182 PetscInt *idxs_sorted,i; 1183 1184 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1185 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1186 for (i=0;i<csize;i++) { 1187 idxs_perm_c[i] = i; 1188 } 1189 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1190 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1191 for (i=0;i<csize;i++) { 1192 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1193 } 1194 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1195 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1196 } 1197 } else { 1198 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1199 iscol_s = iscol; 1200 } 1201 1202 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1203 1204 if (!rsorted || !csorted) { 1205 Mat new_mat; 1206 IS is_perm_r,is_perm_c; 1207 1208 if (!rsorted) { 1209 PetscInt *idxs_r,i; 1210 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1211 for (i=0;i<rsize;i++) { 1212 idxs_r[idxs_perm_r[i]] = i; 1213 } 1214 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1215 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1216 } else { 1217 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1218 } 1219 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1220 1221 if (!csorted) { 1222 if (isrow_s == iscol_s) { 1223 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1224 is_perm_c = is_perm_r; 1225 } else { 1226 PetscInt *idxs_c,i; 1227 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1228 for (i=0;i<csize;i++) { 1229 idxs_c[idxs_perm_c[i]] = i; 1230 } 1231 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1232 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1233 } 1234 } else { 1235 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1236 } 1237 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1238 1239 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1240 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1241 work_mat[0] = new_mat; 1242 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1243 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1244 } 1245 1246 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1247 *B = work_mat[0]; 1248 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1249 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1250 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1256 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1257 { 1258 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1259 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1260 Mat new_mat; 1261 IS is_local,is_global; 1262 PetscInt local_size; 1263 PetscBool isseqaij; 1264 PetscErrorCode ierr; 1265 1266 PetscFunctionBegin; 1267 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1268 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1269 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1270 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1271 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1272 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1273 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1274 1275 /* check */ 1276 if (pcbddc->dbg_flag) { 1277 Vec x,x_change; 1278 PetscReal error; 1279 1280 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1281 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1282 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1283 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1284 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1285 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1286 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1287 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1288 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1289 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1290 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1291 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1292 ierr = VecDestroy(&x);CHKERRQ(ierr); 1293 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1294 } 1295 1296 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1297 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1298 if (isseqaij) { 1299 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1300 } else { 1301 Mat work_mat; 1302 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1303 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1304 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1305 } 1306 if (matis->A->symmetric_set) { 1307 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1308 #if !defined(PETSC_USE_COMPLEX) 1309 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1310 #endif 1311 } 1312 /* 1313 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1314 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1315 */ 1316 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1322 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1323 { 1324 PC_IS* pcis = (PC_IS*)(pc->data); 1325 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1326 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1327 PetscInt *idx_R_local=NULL; 1328 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1329 PetscInt vbs,bs; 1330 PetscBT bitmask=NULL; 1331 PetscErrorCode ierr; 1332 1333 PetscFunctionBegin; 1334 /* 1335 No need to setup local scatters if 1336 - primal space is unchanged 1337 AND 1338 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1339 AND 1340 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1341 */ 1342 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1343 PetscFunctionReturn(0); 1344 } 1345 /* destroy old objects */ 1346 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1347 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1348 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1349 /* Set Non-overlapping dimensions */ 1350 n_B = pcis->n_B; 1351 n_D = pcis->n - n_B; 1352 n_vertices = pcbddc->n_vertices; 1353 1354 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1355 1356 /* create auxiliary bitmask and allocate workspace */ 1357 if (!sub_schurs->reuse_mumps) { 1358 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1359 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1360 for (i=0;i<n_vertices;i++) { 1361 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1362 } 1363 1364 for (i=0, n_R=0; i<pcis->n; i++) { 1365 if (!PetscBTLookup(bitmask,i)) { 1366 idx_R_local[n_R++] = i; 1367 } 1368 } 1369 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1370 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1371 1372 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1373 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1374 } 1375 1376 /* Block code */ 1377 vbs = 1; 1378 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1379 if (bs>1 && !(n_vertices%bs)) { 1380 PetscBool is_blocked = PETSC_TRUE; 1381 PetscInt *vary; 1382 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1383 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1384 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1385 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1386 for (i=0; i<n_vertices/bs; i++) { 1387 if (vary[i]!=0 && vary[i]!=bs) { 1388 is_blocked = PETSC_FALSE; 1389 break; 1390 } 1391 } 1392 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1393 vbs = bs; 1394 for (i=0;i<n_R/vbs;i++) { 1395 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1396 } 1397 } 1398 ierr = PetscFree(vary);CHKERRQ(ierr); 1399 } 1400 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1401 if (sub_schurs->reuse_mumps) { 1402 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1403 1404 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1405 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1406 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1407 reuse_mumps->is_R = pcbddc->is_R_local; 1408 } else { 1409 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1410 } 1411 1412 /* print some info if requested */ 1413 if (pcbddc->dbg_flag) { 1414 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1415 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1416 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1418 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1420 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1421 } 1422 1423 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1424 if (!sub_schurs->reuse_mumps) { 1425 IS is_aux1,is_aux2; 1426 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1427 1428 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1429 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1430 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1431 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1432 for (i=0; i<n_D; i++) { 1433 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1434 } 1435 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1436 for (i=0, j=0; i<n_R; i++) { 1437 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1438 aux_array1[j++] = i; 1439 } 1440 } 1441 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1442 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1443 for (i=0, j=0; i<n_B; i++) { 1444 if (!PetscBTLookup(bitmask,is_indices[i])) { 1445 aux_array2[j++] = i; 1446 } 1447 } 1448 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1449 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1450 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1451 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1452 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1453 1454 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1455 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1456 for (i=0, j=0; i<n_R; i++) { 1457 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1458 aux_array1[j++] = i; 1459 } 1460 } 1461 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1462 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1463 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1464 } 1465 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1466 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1467 } else { 1468 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1469 IS tis; 1470 PetscInt schur_size; 1471 1472 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1473 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1474 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1475 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1476 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1477 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1478 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1479 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1480 } 1481 } 1482 PetscFunctionReturn(0); 1483 } 1484 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1488 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1489 { 1490 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1491 PC_IS *pcis = (PC_IS*)pc->data; 1492 PC pc_temp; 1493 Mat A_RR; 1494 MatReuse reuse; 1495 PetscScalar m_one = -1.0; 1496 PetscReal value; 1497 PetscInt n_D,n_R,ibs,mbs; 1498 PetscBool use_exact,use_exact_reduced,issbaij; 1499 PetscErrorCode ierr; 1500 /* prefixes stuff */ 1501 char dir_prefix[256],neu_prefix[256],str_level[16]; 1502 size_t len; 1503 1504 PetscFunctionBegin; 1505 1506 /* compute prefixes */ 1507 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1508 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1509 if (!pcbddc->current_level) { 1510 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1511 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1512 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1513 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1514 } else { 1515 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1516 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1517 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1518 len -= 15; /* remove "pc_bddc_coarse_" */ 1519 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1520 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1521 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1522 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1523 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1524 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1525 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1526 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1527 } 1528 1529 /* DIRICHLET PROBLEM */ 1530 if (dirichlet) { 1531 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1532 if (pcbddc->local_mat->symmetric_set) { 1533 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1534 } 1535 /* Matrix for Dirichlet problem is pcis->A_II */ 1536 n_D = pcis->n - pcis->n_B; 1537 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1538 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1539 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1540 /* default */ 1541 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1542 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1543 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1544 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1545 if (issbaij) { 1546 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1547 } else { 1548 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1549 } 1550 /* Allow user's customization */ 1551 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1552 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1553 } 1554 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1555 if (sub_schurs->reuse_mumps) { 1556 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1557 1558 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1559 } 1560 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1561 if (!n_D) { 1562 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1563 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1564 } 1565 /* Set Up KSP for Dirichlet problem of BDDC */ 1566 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1567 /* set ksp_D into pcis data */ 1568 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1569 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1570 pcis->ksp_D = pcbddc->ksp_D; 1571 } 1572 1573 /* NEUMANN PROBLEM */ 1574 A_RR = 0; 1575 if (neumann) { 1576 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1577 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1578 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1579 if (pcbddc->ksp_R) { /* already created ksp */ 1580 PetscInt nn_R; 1581 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1582 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1583 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1584 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1585 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1586 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1587 reuse = MAT_INITIAL_MATRIX; 1588 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1589 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1590 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1591 reuse = MAT_INITIAL_MATRIX; 1592 } else { /* safe to reuse the matrix */ 1593 reuse = MAT_REUSE_MATRIX; 1594 } 1595 } 1596 /* last check */ 1597 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1598 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1599 reuse = MAT_INITIAL_MATRIX; 1600 } 1601 } else { /* first time, so we need to create the matrix */ 1602 reuse = MAT_INITIAL_MATRIX; 1603 } 1604 /* extract A_RR */ 1605 if (!sub_schurs->reuse_mumps) { 1606 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1607 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1608 if (ibs != mbs) { 1609 Mat newmat; 1610 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1611 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1612 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1613 } else { 1614 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1615 } 1616 if (pcbddc->local_mat->symmetric_set) { 1617 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1618 } 1619 } else { 1620 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1621 1622 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1623 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1624 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1625 } 1626 if (!pcbddc->ksp_R) { /* create object if not present */ 1627 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1628 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1629 /* default */ 1630 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1631 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1632 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1633 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1634 if (issbaij) { 1635 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1636 } else { 1637 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1638 } 1639 /* Allow user's customization */ 1640 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1641 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1642 } 1643 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1644 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1645 if (!n_R) { 1646 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1647 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1648 } 1649 /* Reuse MUMPS solver if it is present */ 1650 if (sub_schurs->reuse_mumps) { 1651 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1652 1653 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1654 } 1655 /* Set Up KSP for Neumann problem of BDDC */ 1656 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1657 } 1658 /* free Neumann problem's matrix */ 1659 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1660 1661 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1662 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1663 if (pcbddc->dbg_flag) { 1664 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1665 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1666 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1667 } 1668 if (dirichlet) { /* Dirichlet */ 1669 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1670 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1671 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1672 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1673 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1674 /* need to be adapted? */ 1675 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1676 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1677 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1678 /* print info */ 1679 if (pcbddc->dbg_flag) { 1680 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1681 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1682 } 1683 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1684 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1685 } 1686 } 1687 if (neumann) { /* Neumann */ 1688 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1689 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1690 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1691 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1692 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1693 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1694 /* need to be adapted? */ 1695 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1696 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1697 /* print info */ 1698 if (pcbddc->dbg_flag) { 1699 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1700 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1701 } 1702 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1703 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1704 } 1705 } 1706 } 1707 PetscFunctionReturn(0); 1708 } 1709 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1712 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1713 { 1714 PetscErrorCode ierr; 1715 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1716 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1717 1718 PetscFunctionBegin; 1719 if (!sub_schurs->reuse_mumps) { 1720 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1721 } 1722 if (!pcbddc->switch_static) { 1723 if (applytranspose && pcbddc->local_auxmat1) { 1724 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1725 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1726 } 1727 if (!sub_schurs->reuse_mumps) { 1728 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1729 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1730 } else { 1731 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1732 1733 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1734 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1735 } 1736 } else { 1737 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1738 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1739 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1740 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1741 if (applytranspose && pcbddc->local_auxmat1) { 1742 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1743 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1744 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1745 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1746 } 1747 } 1748 if (!sub_schurs->reuse_mumps) { 1749 if (applytranspose) { 1750 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1751 } else { 1752 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1753 } 1754 #if defined(PETSC_HAVE_MUMPS) 1755 } else { 1756 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1757 1758 if (applytranspose) { 1759 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1760 } else { 1761 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1762 } 1763 #endif 1764 } 1765 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1766 if (!pcbddc->switch_static) { 1767 if (!sub_schurs->reuse_mumps) { 1768 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1769 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1770 } else { 1771 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1772 1773 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1774 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1775 } 1776 if (!applytranspose && pcbddc->local_auxmat1) { 1777 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1778 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1779 } 1780 } else { 1781 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1782 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1783 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1784 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1785 if (!applytranspose && pcbddc->local_auxmat1) { 1786 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1787 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1788 } 1789 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1790 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1791 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1792 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1793 } 1794 PetscFunctionReturn(0); 1795 } 1796 1797 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1798 #undef __FUNCT__ 1799 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1800 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1801 { 1802 PetscErrorCode ierr; 1803 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1804 PC_IS* pcis = (PC_IS*) (pc->data); 1805 const PetscScalar zero = 0.0; 1806 1807 PetscFunctionBegin; 1808 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1809 if (applytranspose) { 1810 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1811 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1812 } else { 1813 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1814 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1815 } 1816 /* start communications from local primal nodes to rhs of coarse solver */ 1817 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1818 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1819 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1820 1821 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1822 /* TODO remove null space when doing multilevel */ 1823 if (pcbddc->coarse_ksp) { 1824 Vec rhs,sol; 1825 1826 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1827 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1828 if (applytranspose) { 1829 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1830 } else { 1831 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1832 } 1833 } 1834 1835 /* Local solution on R nodes */ 1836 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1837 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1838 } 1839 1840 /* communications from coarse sol to local primal nodes */ 1841 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1842 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1843 1844 /* Sum contributions from two levels */ 1845 if (applytranspose) { 1846 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1847 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1848 } else { 1849 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1850 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1851 } 1852 PetscFunctionReturn(0); 1853 } 1854 1855 #undef __FUNCT__ 1856 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1857 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1858 { 1859 PetscErrorCode ierr; 1860 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1861 PetscScalar *array; 1862 Vec from,to; 1863 1864 PetscFunctionBegin; 1865 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1866 from = pcbddc->coarse_vec; 1867 to = pcbddc->vec1_P; 1868 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1869 Vec tvec; 1870 1871 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1872 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1873 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1874 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1875 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1876 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1877 } 1878 } else { /* from local to global -> put data in coarse right hand side */ 1879 from = pcbddc->vec1_P; 1880 to = pcbddc->coarse_vec; 1881 } 1882 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1888 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1889 { 1890 PetscErrorCode ierr; 1891 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1892 PetscScalar *array; 1893 Vec from,to; 1894 1895 PetscFunctionBegin; 1896 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1897 from = pcbddc->coarse_vec; 1898 to = pcbddc->vec1_P; 1899 } else { /* from local to global -> put data in coarse right hand side */ 1900 from = pcbddc->vec1_P; 1901 to = pcbddc->coarse_vec; 1902 } 1903 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1904 if (smode == SCATTER_FORWARD) { 1905 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1906 Vec tvec; 1907 1908 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1909 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1910 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1911 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1912 } 1913 } else { 1914 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1915 ierr = VecResetArray(from);CHKERRQ(ierr); 1916 } 1917 } 1918 PetscFunctionReturn(0); 1919 } 1920 1921 /* uncomment for testing purposes */ 1922 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1923 #undef __FUNCT__ 1924 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1925 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1926 { 1927 PetscErrorCode ierr; 1928 PC_IS* pcis = (PC_IS*)(pc->data); 1929 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1930 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1931 /* one and zero */ 1932 PetscScalar one=1.0,zero=0.0; 1933 /* space to store constraints and their local indices */ 1934 PetscScalar *constraints_data; 1935 PetscInt *constraints_idxs,*constraints_idxs_B; 1936 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1937 PetscInt *constraints_n; 1938 /* iterators */ 1939 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1940 /* BLAS integers */ 1941 PetscBLASInt lwork,lierr; 1942 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1943 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1944 /* reuse */ 1945 PetscInt olocal_primal_size,olocal_primal_size_cc; 1946 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1947 /* change of basis */ 1948 PetscBool qr_needed; 1949 PetscBT change_basis,qr_needed_idx; 1950 /* auxiliary stuff */ 1951 PetscInt *nnz,*is_indices; 1952 PetscInt ncc; 1953 /* some quantities */ 1954 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1955 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1956 1957 PetscFunctionBegin; 1958 /* Destroy Mat objects computed previously */ 1959 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1960 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1961 /* save info on constraints from previous setup (if any) */ 1962 olocal_primal_size = pcbddc->local_primal_size; 1963 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1964 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1965 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1966 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1967 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1968 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1969 1970 /* print some info */ 1971 if (pcbddc->dbg_flag) { 1972 IS vertices; 1973 PetscInt nv,nedges,nfaces; 1974 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1975 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1976 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1977 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1978 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1979 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1980 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1981 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1982 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1983 } 1984 1985 if (!pcbddc->adaptive_selection) { 1986 IS ISForVertices,*ISForFaces,*ISForEdges; 1987 MatNullSpace nearnullsp; 1988 const Vec *nearnullvecs; 1989 Vec *localnearnullsp; 1990 PetscScalar *array; 1991 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1992 PetscBool nnsp_has_cnst; 1993 /* LAPACK working arrays for SVD or POD */ 1994 PetscBool skip_lapack,boolforchange; 1995 PetscScalar *work; 1996 PetscReal *singular_vals; 1997 #if defined(PETSC_USE_COMPLEX) 1998 PetscReal *rwork; 1999 #endif 2000 #if defined(PETSC_MISSING_LAPACK_GESVD) 2001 PetscScalar *temp_basis,*correlation_mat; 2002 #else 2003 PetscBLASInt dummy_int=1; 2004 PetscScalar dummy_scalar=1.; 2005 #endif 2006 2007 /* Get index sets for faces, edges and vertices from graph */ 2008 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2009 /* free unneeded index sets */ 2010 if (!pcbddc->use_vertices) { 2011 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2012 } 2013 if (!pcbddc->use_edges) { 2014 for (i=0;i<n_ISForEdges;i++) { 2015 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2016 } 2017 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2018 n_ISForEdges = 0; 2019 } 2020 if (!pcbddc->use_faces) { 2021 for (i=0;i<n_ISForFaces;i++) { 2022 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2023 } 2024 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2025 n_ISForFaces = 0; 2026 } 2027 2028 #if defined(PETSC_USE_DEBUG) 2029 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2030 Also use_change_of_basis should be consistent among processors */ 2031 if (pcbddc->NullSpace) { 2032 PetscBool tbool[2],gbool[2]; 2033 2034 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2035 pcbddc->use_change_of_basis = PETSC_TRUE; 2036 if (!ISForEdges) { 2037 pcbddc->use_change_on_faces = PETSC_TRUE; 2038 } 2039 } 2040 tbool[0] = pcbddc->use_change_of_basis; 2041 tbool[1] = pcbddc->use_change_on_faces; 2042 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2043 pcbddc->use_change_of_basis = gbool[0]; 2044 pcbddc->use_change_on_faces = gbool[1]; 2045 } 2046 #endif 2047 2048 /* check if near null space is attached to global mat */ 2049 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2050 if (nearnullsp) { 2051 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2052 /* remove any stored info */ 2053 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2054 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2055 /* store information for BDDC solver reuse */ 2056 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2057 pcbddc->onearnullspace = nearnullsp; 2058 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2059 for (i=0;i<nnsp_size;i++) { 2060 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2061 } 2062 } else { /* if near null space is not provided BDDC uses constants by default */ 2063 nnsp_size = 0; 2064 nnsp_has_cnst = PETSC_TRUE; 2065 } 2066 /* get max number of constraints on a single cc */ 2067 max_constraints = nnsp_size; 2068 if (nnsp_has_cnst) max_constraints++; 2069 2070 /* 2071 Evaluate maximum storage size needed by the procedure 2072 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2073 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2074 There can be multiple constraints per connected component 2075 */ 2076 n_vertices = 0; 2077 if (ISForVertices) { 2078 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2079 } 2080 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2081 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2082 2083 total_counts = n_ISForFaces+n_ISForEdges; 2084 total_counts *= max_constraints; 2085 total_counts += n_vertices; 2086 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2087 2088 total_counts = 0; 2089 max_size_of_constraint = 0; 2090 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2091 IS used_is; 2092 if (i<n_ISForEdges) { 2093 used_is = ISForEdges[i]; 2094 } else { 2095 used_is = ISForFaces[i-n_ISForEdges]; 2096 } 2097 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2098 total_counts += j; 2099 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2100 } 2101 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2102 2103 /* get local part of global near null space vectors */ 2104 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2105 for (k=0;k<nnsp_size;k++) { 2106 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2107 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2108 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2109 } 2110 2111 /* whether or not to skip lapack calls */ 2112 skip_lapack = PETSC_TRUE; 2113 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2114 2115 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2116 if (!skip_lapack) { 2117 PetscScalar temp_work; 2118 2119 #if defined(PETSC_MISSING_LAPACK_GESVD) 2120 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2121 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2122 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2123 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2124 #if defined(PETSC_USE_COMPLEX) 2125 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2126 #endif 2127 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2128 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2129 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2130 lwork = -1; 2131 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2132 #if !defined(PETSC_USE_COMPLEX) 2133 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2134 #else 2135 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2136 #endif 2137 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2138 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2139 #else /* on missing GESVD */ 2140 /* SVD */ 2141 PetscInt max_n,min_n; 2142 max_n = max_size_of_constraint; 2143 min_n = max_constraints; 2144 if (max_size_of_constraint < max_constraints) { 2145 min_n = max_size_of_constraint; 2146 max_n = max_constraints; 2147 } 2148 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2149 #if defined(PETSC_USE_COMPLEX) 2150 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2151 #endif 2152 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2153 lwork = -1; 2154 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2155 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2156 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2157 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2158 #if !defined(PETSC_USE_COMPLEX) 2159 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2160 #else 2161 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2162 #endif 2163 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2164 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2165 #endif /* on missing GESVD */ 2166 /* Allocate optimal workspace */ 2167 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2168 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2169 } 2170 /* Now we can loop on constraining sets */ 2171 total_counts = 0; 2172 constraints_idxs_ptr[0] = 0; 2173 constraints_data_ptr[0] = 0; 2174 /* vertices */ 2175 if (n_vertices) { 2176 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2177 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2178 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2179 for (i=0;i<n_vertices;i++) { 2180 constraints_n[total_counts] = 1; 2181 constraints_data[total_counts] = 1.0; 2182 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2183 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2184 total_counts++; 2185 } 2186 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2187 PetscBool used_vertex; 2188 for (i=0;i<n_vertices;i++) { 2189 used_vertex = PETSC_FALSE; 2190 k = 0; 2191 while (!used_vertex && k<nnsp_size) { 2192 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2193 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2194 constraints_n[total_counts] = 1; 2195 constraints_idxs[total_counts] = is_indices[i]; 2196 constraints_data[total_counts] = 1.0; 2197 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2198 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2199 total_counts++; 2200 used_vertex = PETSC_TRUE; 2201 } 2202 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2203 k++; 2204 } 2205 } 2206 } 2207 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2208 n_vertices = total_counts; 2209 } 2210 2211 /* edges and faces */ 2212 total_counts_cc = total_counts; 2213 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2214 IS used_is; 2215 PetscBool idxs_copied = PETSC_FALSE; 2216 2217 if (ncc<n_ISForEdges) { 2218 used_is = ISForEdges[ncc]; 2219 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2220 } else { 2221 used_is = ISForFaces[ncc-n_ISForEdges]; 2222 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2223 } 2224 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2225 2226 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2227 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2228 /* change of basis should not be performed on local periodic nodes */ 2229 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2230 if (nnsp_has_cnst) { 2231 PetscScalar quad_value; 2232 2233 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2234 idxs_copied = PETSC_TRUE; 2235 2236 if (!pcbddc->use_nnsp_true) { 2237 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2238 } else { 2239 quad_value = 1.0; 2240 } 2241 for (j=0;j<size_of_constraint;j++) { 2242 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2243 } 2244 temp_constraints++; 2245 total_counts++; 2246 } 2247 for (k=0;k<nnsp_size;k++) { 2248 PetscReal real_value; 2249 PetscScalar *ptr_to_data; 2250 2251 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2252 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2253 for (j=0;j<size_of_constraint;j++) { 2254 ptr_to_data[j] = array[is_indices[j]]; 2255 } 2256 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2257 /* check if array is null on the connected component */ 2258 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2259 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2260 if (real_value > 0.0) { /* keep indices and values */ 2261 temp_constraints++; 2262 total_counts++; 2263 if (!idxs_copied) { 2264 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2265 idxs_copied = PETSC_TRUE; 2266 } 2267 } 2268 } 2269 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2270 valid_constraints = temp_constraints; 2271 if (!pcbddc->use_nnsp_true && temp_constraints) { 2272 if (temp_constraints == 1) { /* just normalize the constraint */ 2273 PetscScalar norm,*ptr_to_data; 2274 2275 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2276 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2277 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2278 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2279 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2280 } else { /* perform SVD */ 2281 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2282 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2283 2284 #if defined(PETSC_MISSING_LAPACK_GESVD) 2285 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2286 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2287 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2288 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2289 from that computed using LAPACKgesvd 2290 -> This is due to a different computation of eigenvectors in LAPACKheev 2291 -> The quality of the POD-computed basis will be the same */ 2292 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2293 /* Store upper triangular part of correlation matrix */ 2294 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2295 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2296 for (j=0;j<temp_constraints;j++) { 2297 for (k=0;k<j+1;k++) { 2298 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2299 } 2300 } 2301 /* compute eigenvalues and eigenvectors of correlation matrix */ 2302 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2303 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2304 #if !defined(PETSC_USE_COMPLEX) 2305 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2306 #else 2307 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2308 #endif 2309 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2310 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2311 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2312 j = 0; 2313 while (j < temp_constraints && singular_vals[j] < tol) j++; 2314 total_counts = total_counts-j; 2315 valid_constraints = temp_constraints-j; 2316 /* scale and copy POD basis into used quadrature memory */ 2317 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2318 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2319 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2320 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2321 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2322 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2323 if (j<temp_constraints) { 2324 PetscInt ii; 2325 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2326 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2327 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2328 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2329 for (k=0;k<temp_constraints-j;k++) { 2330 for (ii=0;ii<size_of_constraint;ii++) { 2331 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2332 } 2333 } 2334 } 2335 #else /* on missing GESVD */ 2336 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2337 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2338 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2339 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2340 #if !defined(PETSC_USE_COMPLEX) 2341 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2342 #else 2343 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2344 #endif 2345 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2346 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2347 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2348 k = temp_constraints; 2349 if (k > size_of_constraint) k = size_of_constraint; 2350 j = 0; 2351 while (j < k && singular_vals[k-j-1] < tol) j++; 2352 valid_constraints = k-j; 2353 total_counts = total_counts-temp_constraints+valid_constraints; 2354 #endif /* on missing GESVD */ 2355 } 2356 } 2357 /* update pointers information */ 2358 if (valid_constraints) { 2359 constraints_n[total_counts_cc] = valid_constraints; 2360 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2361 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2362 /* set change_of_basis flag */ 2363 if (boolforchange) { 2364 PetscBTSet(change_basis,total_counts_cc); 2365 } 2366 total_counts_cc++; 2367 } 2368 } 2369 /* free workspace */ 2370 if (!skip_lapack) { 2371 ierr = PetscFree(work);CHKERRQ(ierr); 2372 #if defined(PETSC_USE_COMPLEX) 2373 ierr = PetscFree(rwork);CHKERRQ(ierr); 2374 #endif 2375 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2376 #if defined(PETSC_MISSING_LAPACK_GESVD) 2377 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2378 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2379 #endif 2380 } 2381 for (k=0;k<nnsp_size;k++) { 2382 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2383 } 2384 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2385 /* free index sets of faces, edges and vertices */ 2386 for (i=0;i<n_ISForFaces;i++) { 2387 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2388 } 2389 if (n_ISForFaces) { 2390 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2391 } 2392 for (i=0;i<n_ISForEdges;i++) { 2393 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2394 } 2395 if (n_ISForEdges) { 2396 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2397 } 2398 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2399 } else { 2400 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2401 2402 total_counts = 0; 2403 n_vertices = 0; 2404 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2405 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2406 } 2407 max_constraints = 0; 2408 total_counts_cc = 0; 2409 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2410 total_counts += pcbddc->adaptive_constraints_n[i]; 2411 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2412 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2413 } 2414 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2415 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2416 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2417 constraints_data = pcbddc->adaptive_constraints_data; 2418 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2419 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2420 total_counts_cc = 0; 2421 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2422 if (pcbddc->adaptive_constraints_n[i]) { 2423 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2424 } 2425 } 2426 #if 0 2427 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2428 for (i=0;i<total_counts_cc;i++) { 2429 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2430 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2431 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2432 printf(" %d",constraints_idxs[j]); 2433 } 2434 printf("\n"); 2435 printf("number of cc: %d\n",constraints_n[i]); 2436 } 2437 for (i=0;i<n_vertices;i++) { 2438 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2439 } 2440 for (i=0;i<sub_schurs->n_subs;i++) { 2441 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2442 } 2443 #endif 2444 2445 max_size_of_constraint = 0; 2446 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2447 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2448 /* Change of basis */ 2449 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2450 if (pcbddc->use_change_of_basis) { 2451 for (i=0;i<sub_schurs->n_subs;i++) { 2452 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2453 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2454 } 2455 } 2456 } 2457 } 2458 pcbddc->local_primal_size = total_counts; 2459 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2460 2461 /* map constraints_idxs in boundary numbering */ 2462 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2463 if (i != constraints_idxs_ptr[total_counts_cc]) { 2464 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2465 } 2466 2467 /* Create constraint matrix */ 2468 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2469 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2470 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2471 2472 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2473 /* determine if a QR strategy is needed for change of basis */ 2474 qr_needed = PETSC_FALSE; 2475 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2476 total_primal_vertices=0; 2477 pcbddc->local_primal_size_cc = 0; 2478 for (i=0;i<total_counts_cc;i++) { 2479 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2480 if (size_of_constraint == 1) { 2481 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2482 pcbddc->local_primal_size_cc += 1; 2483 } else if (PetscBTLookup(change_basis,i)) { 2484 for (k=0;k<constraints_n[i];k++) { 2485 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2486 } 2487 pcbddc->local_primal_size_cc += constraints_n[i]; 2488 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2489 PetscBTSet(qr_needed_idx,i); 2490 qr_needed = PETSC_TRUE; 2491 } 2492 } else { 2493 pcbddc->local_primal_size_cc += 1; 2494 } 2495 } 2496 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2497 pcbddc->n_vertices = total_primal_vertices; 2498 /* permute indices in order to have a sorted set of vertices */ 2499 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2500 2501 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2502 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2503 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2504 2505 /* nonzero structure of constraint matrix */ 2506 /* and get reference dof for local constraints */ 2507 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2508 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2509 2510 j = total_primal_vertices; 2511 total_counts = total_primal_vertices; 2512 cum = total_primal_vertices; 2513 for (i=n_vertices;i<total_counts_cc;i++) { 2514 if (!PetscBTLookup(change_basis,i)) { 2515 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2516 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2517 cum++; 2518 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2519 for (k=0;k<constraints_n[i];k++) { 2520 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2521 nnz[j+k] = size_of_constraint; 2522 } 2523 j += constraints_n[i]; 2524 } 2525 } 2526 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2527 ierr = PetscFree(nnz);CHKERRQ(ierr); 2528 2529 /* set values in constraint matrix */ 2530 for (i=0;i<total_primal_vertices;i++) { 2531 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2532 } 2533 total_counts = total_primal_vertices; 2534 for (i=n_vertices;i<total_counts_cc;i++) { 2535 if (!PetscBTLookup(change_basis,i)) { 2536 PetscInt *cols; 2537 2538 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2539 cols = constraints_idxs+constraints_idxs_ptr[i]; 2540 for (k=0;k<constraints_n[i];k++) { 2541 PetscInt row = total_counts+k; 2542 PetscScalar *vals; 2543 2544 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2545 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2546 } 2547 total_counts += constraints_n[i]; 2548 } 2549 } 2550 /* assembling */ 2551 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2552 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2553 2554 /* 2555 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2556 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2557 */ 2558 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2559 if (pcbddc->use_change_of_basis) { 2560 /* dual and primal dofs on a single cc */ 2561 PetscInt dual_dofs,primal_dofs; 2562 /* working stuff for GEQRF */ 2563 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2564 PetscBLASInt lqr_work; 2565 /* working stuff for UNGQR */ 2566 PetscScalar *gqr_work,lgqr_work_t; 2567 PetscBLASInt lgqr_work; 2568 /* working stuff for TRTRS */ 2569 PetscScalar *trs_rhs; 2570 PetscBLASInt Blas_NRHS; 2571 /* pointers for values insertion into change of basis matrix */ 2572 PetscInt *start_rows,*start_cols; 2573 PetscScalar *start_vals; 2574 /* working stuff for values insertion */ 2575 PetscBT is_primal; 2576 PetscInt *aux_primal_numbering_B; 2577 /* matrix sizes */ 2578 PetscInt global_size,local_size; 2579 /* temporary change of basis */ 2580 Mat localChangeOfBasisMatrix; 2581 /* extra space for debugging */ 2582 PetscScalar *dbg_work; 2583 2584 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2585 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2586 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2587 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2588 /* nonzeros for local mat */ 2589 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2590 for (i=0;i<pcis->n;i++) nnz[i]=1; 2591 for (i=n_vertices;i<total_counts_cc;i++) { 2592 if (PetscBTLookup(change_basis,i)) { 2593 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2594 if (PetscBTLookup(qr_needed_idx,i)) { 2595 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2596 } else { 2597 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2598 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2599 } 2600 } 2601 } 2602 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2603 ierr = PetscFree(nnz);CHKERRQ(ierr); 2604 /* Set initial identity in the matrix */ 2605 for (i=0;i<pcis->n;i++) { 2606 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2607 } 2608 2609 if (pcbddc->dbg_flag) { 2610 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2611 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2612 } 2613 2614 2615 /* Now we loop on the constraints which need a change of basis */ 2616 /* 2617 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2618 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2619 2620 Basic blocks of change of basis matrix T computed by 2621 2622 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2623 2624 | 1 0 ... 0 s_1/S | 2625 | 0 1 ... 0 s_2/S | 2626 | ... | 2627 | 0 ... 1 s_{n-1}/S | 2628 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2629 2630 with S = \sum_{i=1}^n s_i^2 2631 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2632 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2633 2634 - QR decomposition of constraints otherwise 2635 */ 2636 if (qr_needed) { 2637 /* space to store Q */ 2638 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2639 /* first we issue queries for optimal work */ 2640 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2641 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2642 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2643 lqr_work = -1; 2644 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2645 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2646 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2647 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2648 lgqr_work = -1; 2649 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2650 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2651 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2652 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2653 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2654 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2655 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2656 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2657 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2658 /* array to store scaling factors for reflectors */ 2659 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2660 /* array to store rhs and solution of triangular solver */ 2661 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2662 /* allocating workspace for check */ 2663 if (pcbddc->dbg_flag) { 2664 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2665 } 2666 } 2667 /* array to store whether a node is primal or not */ 2668 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2669 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2670 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2671 if (i != total_primal_vertices) { 2672 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2673 } 2674 for (i=0;i<total_primal_vertices;i++) { 2675 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2676 } 2677 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2678 2679 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2680 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2681 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2682 if (PetscBTLookup(change_basis,total_counts)) { 2683 /* get constraint info */ 2684 primal_dofs = constraints_n[total_counts]; 2685 dual_dofs = size_of_constraint-primal_dofs; 2686 2687 if (pcbddc->dbg_flag) { 2688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2689 } 2690 2691 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2692 2693 /* copy quadrature constraints for change of basis check */ 2694 if (pcbddc->dbg_flag) { 2695 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2696 } 2697 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2698 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2699 2700 /* compute QR decomposition of constraints */ 2701 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2702 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2703 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2704 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2705 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2706 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2707 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2708 2709 /* explictly compute R^-T */ 2710 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2711 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2712 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2713 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2714 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2715 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2716 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2717 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2718 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2719 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2720 2721 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2722 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2723 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2724 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2725 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2726 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2727 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2728 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2729 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2730 2731 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2732 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2733 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2734 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2735 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2736 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2737 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2738 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2739 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2740 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2741 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2742 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2743 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2744 2745 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2746 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2747 /* insert cols for primal dofs */ 2748 for (j=0;j<primal_dofs;j++) { 2749 start_vals = &qr_basis[j*size_of_constraint]; 2750 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2751 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2752 } 2753 /* insert cols for dual dofs */ 2754 for (j=0,k=0;j<dual_dofs;k++) { 2755 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2756 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2757 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2758 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2759 j++; 2760 } 2761 } 2762 2763 /* check change of basis */ 2764 if (pcbddc->dbg_flag) { 2765 PetscInt ii,jj; 2766 PetscBool valid_qr=PETSC_TRUE; 2767 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2768 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2769 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2770 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2771 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2772 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2773 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2774 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2775 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2776 for (jj=0;jj<size_of_constraint;jj++) { 2777 for (ii=0;ii<primal_dofs;ii++) { 2778 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2779 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2780 } 2781 } 2782 if (!valid_qr) { 2783 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2784 for (jj=0;jj<size_of_constraint;jj++) { 2785 for (ii=0;ii<primal_dofs;ii++) { 2786 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2787 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2788 } 2789 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2790 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2791 } 2792 } 2793 } 2794 } else { 2795 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2796 } 2797 } 2798 } else { /* simple transformation block */ 2799 PetscInt row,col; 2800 PetscScalar val,norm; 2801 2802 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2803 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2804 for (j=0;j<size_of_constraint;j++) { 2805 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2806 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2807 if (!PetscBTLookup(is_primal,row_B)) { 2808 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2809 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2810 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2811 } else { 2812 for (k=0;k<size_of_constraint;k++) { 2813 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2814 if (row != col) { 2815 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2816 } else { 2817 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2818 } 2819 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2820 } 2821 } 2822 } 2823 if (pcbddc->dbg_flag) { 2824 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2825 } 2826 } 2827 } else { 2828 if (pcbddc->dbg_flag) { 2829 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2830 } 2831 } 2832 } 2833 2834 /* free workspace */ 2835 if (qr_needed) { 2836 if (pcbddc->dbg_flag) { 2837 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2838 } 2839 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2840 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2841 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2842 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2843 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2844 } 2845 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2846 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2847 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2848 2849 /* assembling of global change of variable */ 2850 { 2851 Mat tmat; 2852 PetscInt bs; 2853 2854 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2855 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2856 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2857 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2858 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2859 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2860 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2861 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2862 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2863 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2864 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2865 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2866 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2867 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2868 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2869 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2870 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2871 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2872 } 2873 /* check */ 2874 if (pcbddc->dbg_flag) { 2875 PetscReal error; 2876 Vec x,x_change; 2877 2878 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2879 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2880 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2881 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2882 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2883 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2884 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2885 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2886 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2887 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2888 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2889 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2890 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2891 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2892 ierr = VecDestroy(&x);CHKERRQ(ierr); 2893 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2894 } 2895 2896 /* adapt sub_schurs computed (if any) */ 2897 if (pcbddc->use_deluxe_scaling) { 2898 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2899 if (sub_schurs->S_Ej_all) { 2900 Mat S_new,tmat; 2901 IS is_all_N; 2902 2903 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2904 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2905 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2906 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2907 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2908 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2909 sub_schurs->S_Ej_all = S_new; 2910 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2911 if (sub_schurs->sum_S_Ej_all) { 2912 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2913 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2914 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2915 sub_schurs->sum_S_Ej_all = S_new; 2916 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2917 } 2918 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2919 } 2920 } 2921 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2922 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2923 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2924 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2925 } 2926 2927 /* set up change of basis context */ 2928 if (pcbddc->ChangeOfBasisMatrix) { 2929 PCBDDCChange_ctx change_ctx; 2930 2931 if (!pcbddc->new_global_mat) { 2932 PetscInt global_size,local_size; 2933 2934 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2935 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2936 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2937 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2938 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2939 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2940 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2941 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2942 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2943 } else { 2944 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2945 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2946 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2947 } 2948 if (!pcbddc->user_ChangeOfBasisMatrix) { 2949 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2950 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2951 } else { 2952 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2953 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2954 } 2955 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2956 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2957 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2958 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2959 } 2960 2961 /* check if a new primal space has been introduced */ 2962 pcbddc->new_primal_space_local = PETSC_TRUE; 2963 if (olocal_primal_size == pcbddc->local_primal_size) { 2964 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2965 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2966 if (!pcbddc->new_primal_space_local) { 2967 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2968 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2969 } 2970 } 2971 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2972 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2973 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2974 2975 /* flush dbg viewer */ 2976 if (pcbddc->dbg_flag) { 2977 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2978 } 2979 2980 /* free workspace */ 2981 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2982 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2983 if (!pcbddc->adaptive_selection) { 2984 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2985 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2986 } else { 2987 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2988 pcbddc->adaptive_constraints_idxs_ptr, 2989 pcbddc->adaptive_constraints_data_ptr, 2990 pcbddc->adaptive_constraints_idxs, 2991 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2992 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2993 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2994 } 2995 PetscFunctionReturn(0); 2996 } 2997 2998 #undef __FUNCT__ 2999 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3000 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3001 { 3002 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3003 PC_IS *pcis = (PC_IS*)pc->data; 3004 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3005 PetscInt ierr,i,vertex_size,N; 3006 PetscViewer viewer=pcbddc->dbg_viewer; 3007 3008 PetscFunctionBegin; 3009 /* Reset previously computed graph */ 3010 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3011 /* Init local Graph struct */ 3012 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3013 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 3014 3015 /* Check validity of the csr graph passed in by the user */ 3016 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3017 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3018 } 3019 3020 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3021 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3022 PetscInt *xadj,*adjncy; 3023 PetscInt nvtxs; 3024 PetscBool flg_row=PETSC_FALSE; 3025 3026 if (pcbddc->use_local_adj) { 3027 3028 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3029 if (flg_row) { 3030 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3031 pcbddc->computed_rowadj = PETSC_TRUE; 3032 } 3033 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3034 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3035 IS is_dummy; 3036 ISLocalToGlobalMapping l2gmap_dummy; 3037 PetscInt j,sum; 3038 PetscInt *cxadj,*cadjncy; 3039 const PetscInt *idxs; 3040 PCBDDCGraph graph; 3041 PetscBT is_on_boundary; 3042 3043 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3044 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3045 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3046 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3047 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3048 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3049 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3050 if (flg_row) { 3051 graph->xadj = xadj; 3052 graph->adjncy = adjncy; 3053 } 3054 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3055 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3056 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3057 3058 if (pcbddc->dbg_flag) { 3059 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3060 for (i=0;i<graph->ncc;i++) { 3061 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3062 } 3063 } 3064 3065 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3066 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3067 for (i=0;i<pcis->n_B;i++) { 3068 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3069 } 3070 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3071 3072 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3073 sum = 0; 3074 for (i=0;i<graph->ncc;i++) { 3075 PetscInt sizecc = 0; 3076 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3077 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3078 sizecc++; 3079 } 3080 } 3081 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3082 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3083 cxadj[graph->queue[j]] = sizecc; 3084 } 3085 } 3086 sum += sizecc*sizecc; 3087 } 3088 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3089 sum = 0; 3090 for (i=0;i<pcis->n;i++) { 3091 PetscInt temp = cxadj[i]; 3092 cxadj[i] = sum; 3093 sum += temp; 3094 } 3095 cxadj[pcis->n] = sum; 3096 for (i=0;i<graph->ncc;i++) { 3097 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3098 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3099 PetscInt k,sizecc = 0; 3100 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3101 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3102 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3103 sizecc++; 3104 } 3105 } 3106 } 3107 } 3108 } 3109 if (sum) { 3110 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3111 } else { 3112 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3113 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3114 } 3115 graph->xadj = 0; 3116 graph->adjncy = 0; 3117 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3118 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3119 } 3120 } 3121 if (pcbddc->dbg_flag) { 3122 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3123 } 3124 3125 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3126 vertex_size = 1; 3127 if (pcbddc->user_provided_isfordofs) { 3128 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3129 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3130 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3131 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3132 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3133 } 3134 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3135 pcbddc->n_ISForDofs = 0; 3136 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3137 } 3138 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3139 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3140 } else { 3141 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3142 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3143 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3144 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3145 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3146 } 3147 } 3148 } 3149 3150 /* Setup of Graph */ 3151 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3152 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3153 } 3154 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3155 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3156 } 3157 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3158 3159 /* Graph's connected components analysis */ 3160 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3161 3162 /* print some info to stdout */ 3163 if (pcbddc->dbg_flag) { 3164 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3165 } 3166 3167 /* mark topography has done */ 3168 pcbddc->recompute_topography = PETSC_FALSE; 3169 PetscFunctionReturn(0); 3170 } 3171 3172 #undef __FUNCT__ 3173 #define __FUNCT__ "PCBDDCSubsetNumbering" 3174 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3175 { 3176 Vec local_vec,global_vec; 3177 IS seqis,paris; 3178 VecScatter scatter_ctx; 3179 PetscScalar *array; 3180 PetscInt *temp_global_dofs; 3181 PetscScalar globalsum; 3182 PetscInt i,j,s; 3183 PetscInt nlocals,first_index,old_index,max_local,max_global; 3184 PetscMPIInt rank_prec_comm,size_prec_comm; 3185 PetscInt *dof_sizes,*dof_displs; 3186 PetscBool first_found; 3187 PetscErrorCode ierr; 3188 3189 PetscFunctionBegin; 3190 /* mpi buffers */ 3191 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3192 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3193 j = ( !rank_prec_comm ? size_prec_comm : 0); 3194 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3195 /* get maximum size of subset */ 3196 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3197 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3198 max_local = 0; 3199 for (i=0;i<n_local_dofs;i++) { 3200 if (max_local < temp_global_dofs[i] ) { 3201 max_local = temp_global_dofs[i]; 3202 } 3203 } 3204 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3205 max_global++; 3206 max_local = 0; 3207 for (i=0;i<n_local_dofs;i++) { 3208 if (max_local < local_dofs[i] ) { 3209 max_local = local_dofs[i]; 3210 } 3211 } 3212 max_local++; 3213 /* allocate workspace */ 3214 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3215 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3216 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3217 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3218 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3219 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3220 /* create scatter */ 3221 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3222 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3223 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3224 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3225 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3226 /* init array */ 3227 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3228 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3229 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3230 if (local_dofs_mult) { 3231 for (i=0;i<n_local_dofs;i++) { 3232 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3233 } 3234 } else { 3235 for (i=0;i<n_local_dofs;i++) { 3236 array[local_dofs[i]]=1.0; 3237 } 3238 } 3239 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3240 /* scatter into global vec and get total number of global dofs */ 3241 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3242 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3243 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3244 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3245 /* Fill global_vec with cumulative function for global numbering */ 3246 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3247 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3248 nlocals = 0; 3249 first_index = -1; 3250 first_found = PETSC_FALSE; 3251 for (i=0;i<s;i++) { 3252 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3253 first_found = PETSC_TRUE; 3254 first_index = i; 3255 } 3256 nlocals += (PetscInt)PetscRealPart(array[i]); 3257 } 3258 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3259 if (!rank_prec_comm) { 3260 dof_displs[0]=0; 3261 for (i=1;i<size_prec_comm;i++) { 3262 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3263 } 3264 } 3265 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3266 if (first_found) { 3267 array[first_index] += (PetscScalar)nlocals; 3268 old_index = first_index; 3269 for (i=first_index+1;i<s;i++) { 3270 if (PetscRealPart(array[i]) > 0.1) { 3271 array[i] += array[old_index]; 3272 old_index = i; 3273 } 3274 } 3275 } 3276 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3277 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3278 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3279 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3280 /* get global ordering of local dofs */ 3281 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3282 if (local_dofs_mult) { 3283 for (i=0;i<n_local_dofs;i++) { 3284 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3285 } 3286 } else { 3287 for (i=0;i<n_local_dofs;i++) { 3288 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3289 } 3290 } 3291 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3292 /* free workspace */ 3293 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3294 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3295 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3296 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3297 /* return pointer to global ordering of local dofs */ 3298 *global_numbering_subset = temp_global_dofs; 3299 PetscFunctionReturn(0); 3300 } 3301 3302 #undef __FUNCT__ 3303 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3304 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3305 { 3306 PetscInt i,j; 3307 PetscScalar *alphas; 3308 PetscErrorCode ierr; 3309 3310 PetscFunctionBegin; 3311 /* this implements stabilized Gram-Schmidt */ 3312 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3313 for (i=0;i<n;i++) { 3314 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3315 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3316 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3317 } 3318 ierr = PetscFree(alphas);CHKERRQ(ierr); 3319 PetscFunctionReturn(0); 3320 } 3321 3322 #undef __FUNCT__ 3323 #define __FUNCT__ "MatISGetSubassemblingPattern" 3324 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3325 { 3326 Mat subdomain_adj; 3327 IS new_ranks,ranks_send_to; 3328 MatPartitioning partitioner; 3329 Mat_IS *matis; 3330 PetscInt n_neighs,*neighs,*n_shared,**shared; 3331 PetscInt prank; 3332 PetscMPIInt size,rank,color; 3333 PetscInt *xadj,*adjncy,*oldranks; 3334 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3335 PetscInt i,local_size,threshold=0; 3336 PetscErrorCode ierr; 3337 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3338 PetscSubcomm subcomm; 3339 3340 PetscFunctionBegin; 3341 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3342 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3343 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3344 3345 /* Get info on mapping */ 3346 matis = (Mat_IS*)(mat->data); 3347 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3348 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3349 3350 /* build local CSR graph of subdomains' connectivity */ 3351 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3352 xadj[0] = 0; 3353 xadj[1] = PetscMax(n_neighs-1,0); 3354 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3355 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3356 3357 if (threshold) { 3358 PetscInt xadj_count = 0; 3359 for (i=1;i<n_neighs;i++) { 3360 if (n_shared[i] > threshold) { 3361 adjncy[xadj_count] = neighs[i]; 3362 adjncy_wgt[xadj_count] = n_shared[i]; 3363 xadj_count++; 3364 } 3365 } 3366 xadj[1] = xadj_count; 3367 } else { 3368 if (xadj[1]) { 3369 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3370 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3371 } 3372 } 3373 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3374 if (use_square) { 3375 for (i=0;i<xadj[1];i++) { 3376 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3377 } 3378 } 3379 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3380 3381 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3382 3383 /* 3384 Restrict work on active processes only. 3385 */ 3386 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3387 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3388 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3389 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3390 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3391 if (color) { 3392 ierr = PetscFree(xadj);CHKERRQ(ierr); 3393 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3394 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3395 } else { 3396 PetscInt coarsening_ratio; 3397 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3398 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3399 prank = rank; 3400 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3401 /* 3402 for (i=0;i<size;i++) { 3403 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3404 } 3405 */ 3406 for (i=0;i<xadj[1];i++) { 3407 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3408 } 3409 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3410 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3411 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3412 3413 /* Partition */ 3414 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3415 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3416 if (use_vwgt) { 3417 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3418 v_wgt[0] = local_size; 3419 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3420 } 3421 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3422 coarsening_ratio = size/n_subdomains; 3423 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3424 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3425 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3426 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3427 3428 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3429 if (contiguous) { 3430 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3431 } else { 3432 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3433 } 3434 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3435 /* clean up */ 3436 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3437 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3438 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3439 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3440 } 3441 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3442 3443 /* assemble parallel IS for sends */ 3444 i = 1; 3445 if (color) i=0; 3446 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3447 3448 /* get back IS */ 3449 *is_sends = ranks_send_to; 3450 PetscFunctionReturn(0); 3451 } 3452 3453 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3454 3455 #undef __FUNCT__ 3456 #define __FUNCT__ "MatISSubassemble" 3457 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3458 { 3459 Mat local_mat; 3460 Mat_IS *matis; 3461 IS is_sends_internal; 3462 PetscInt rows,cols,new_local_rows; 3463 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3464 PetscBool ismatis,isdense,newisdense,destroy_mat; 3465 ISLocalToGlobalMapping l2gmap; 3466 PetscInt* l2gmap_indices; 3467 const PetscInt* is_indices; 3468 MatType new_local_type; 3469 /* buffers */ 3470 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3471 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3472 PetscInt *recv_buffer_idxs_local; 3473 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3474 /* MPI */ 3475 MPI_Comm comm,comm_n; 3476 PetscSubcomm subcomm; 3477 PetscMPIInt n_sends,n_recvs,commsize; 3478 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3479 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3480 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3481 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3482 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3483 PetscErrorCode ierr; 3484 3485 PetscFunctionBegin; 3486 /* TODO: add missing checks */ 3487 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3488 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3489 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3490 PetscValidLogicalCollectiveInt(mat,nis,7); 3491 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3492 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3493 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3494 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3495 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3496 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3497 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3498 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3499 PetscInt mrows,mcols,mnrows,mncols; 3500 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3501 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3502 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3503 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3504 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3505 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3506 } 3507 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3508 PetscValidLogicalCollectiveInt(mat,bs,0); 3509 /* prepare IS for sending if not provided */ 3510 if (!is_sends) { 3511 PetscBool pcontig = PETSC_TRUE; 3512 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3513 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3514 } else { 3515 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3516 is_sends_internal = is_sends; 3517 } 3518 3519 /* get pointer of MATIS data */ 3520 matis = (Mat_IS*)mat->data; 3521 3522 /* get comm */ 3523 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3524 3525 /* compute number of sends */ 3526 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3527 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3528 3529 /* compute number of receives */ 3530 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3531 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3532 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3533 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3534 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3535 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3536 ierr = PetscFree(iflags);CHKERRQ(ierr); 3537 3538 /* restrict comm if requested */ 3539 subcomm = 0; 3540 destroy_mat = PETSC_FALSE; 3541 if (restrict_comm) { 3542 PetscMPIInt color,subcommsize; 3543 3544 color = 0; 3545 if (!n_recvs && n_sends) color = 1; /* processes sending and not receiving anything will not partecipate in new comm */ 3546 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3547 subcommsize = commsize - subcommsize; 3548 /* check if reuse has been requested */ 3549 if (reuse == MAT_REUSE_MATRIX) { 3550 if (*mat_n) { 3551 PetscMPIInt subcommsize2; 3552 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3553 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3554 comm_n = PetscObjectComm((PetscObject)*mat_n); 3555 } else { 3556 comm_n = PETSC_COMM_SELF; 3557 } 3558 } else { /* MAT_INITIAL_MATRIX */ 3559 PetscMPIInt rank; 3560 3561 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3562 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3563 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3564 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3565 comm_n = PetscSubcommChild(subcomm); 3566 } 3567 /* flag to destroy *mat_n if not significative */ 3568 if (color) destroy_mat = PETSC_TRUE; 3569 } else { 3570 comm_n = comm; 3571 } 3572 3573 /* prepare send/receive buffers */ 3574 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3575 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3576 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3577 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3578 if (nis) { 3579 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3580 } 3581 3582 /* Get data from local matrices */ 3583 if (!isdense) { 3584 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3585 /* TODO: See below some guidelines on how to prepare the local buffers */ 3586 /* 3587 send_buffer_vals should contain the raw values of the local matrix 3588 send_buffer_idxs should contain: 3589 - MatType_PRIVATE type 3590 - PetscInt size_of_l2gmap 3591 - PetscInt global_row_indices[size_of_l2gmap] 3592 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3593 */ 3594 } else { 3595 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3596 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3597 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3598 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3599 send_buffer_idxs[1] = i; 3600 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3601 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3602 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3603 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3604 for (i=0;i<n_sends;i++) { 3605 ilengths_vals[is_indices[i]] = len*len; 3606 ilengths_idxs[is_indices[i]] = len+2; 3607 } 3608 } 3609 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3610 /* additional is (if any) */ 3611 if (nis) { 3612 PetscMPIInt psum; 3613 PetscInt j; 3614 for (j=0,psum=0;j<nis;j++) { 3615 PetscInt plen; 3616 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3617 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3618 psum += len+1; /* indices + lenght */ 3619 } 3620 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3621 for (j=0,psum=0;j<nis;j++) { 3622 PetscInt plen; 3623 const PetscInt *is_array_idxs; 3624 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3625 send_buffer_idxs_is[psum] = plen; 3626 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3627 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3628 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3629 psum += plen+1; /* indices + lenght */ 3630 } 3631 for (i=0;i<n_sends;i++) { 3632 ilengths_idxs_is[is_indices[i]] = psum; 3633 } 3634 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3635 } 3636 3637 buf_size_idxs = 0; 3638 buf_size_vals = 0; 3639 buf_size_idxs_is = 0; 3640 for (i=0;i<n_recvs;i++) { 3641 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3642 buf_size_vals += (PetscInt)olengths_vals[i]; 3643 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3644 } 3645 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3646 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3647 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3648 3649 /* get new tags for clean communications */ 3650 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3651 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3652 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3653 3654 /* allocate for requests */ 3655 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3656 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3657 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3658 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3659 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3660 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3661 3662 /* communications */ 3663 ptr_idxs = recv_buffer_idxs; 3664 ptr_vals = recv_buffer_vals; 3665 ptr_idxs_is = recv_buffer_idxs_is; 3666 for (i=0;i<n_recvs;i++) { 3667 source_dest = onodes[i]; 3668 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3669 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3670 ptr_idxs += olengths_idxs[i]; 3671 ptr_vals += olengths_vals[i]; 3672 if (nis) { 3673 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3674 ptr_idxs_is += olengths_idxs_is[i]; 3675 } 3676 } 3677 for (i=0;i<n_sends;i++) { 3678 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3679 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3680 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3681 if (nis) { 3682 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3683 } 3684 } 3685 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3686 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3687 3688 /* assemble new l2g map */ 3689 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3690 ptr_idxs = recv_buffer_idxs; 3691 new_local_rows = 0; 3692 for (i=0;i<n_recvs;i++) { 3693 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3694 ptr_idxs += olengths_idxs[i]; 3695 } 3696 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3697 ptr_idxs = recv_buffer_idxs; 3698 new_local_rows = 0; 3699 for (i=0;i<n_recvs;i++) { 3700 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3701 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3702 ptr_idxs += olengths_idxs[i]; 3703 } 3704 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3705 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3706 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3707 3708 /* infer new local matrix type from received local matrices type */ 3709 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3710 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3711 if (n_recvs) { 3712 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3713 ptr_idxs = recv_buffer_idxs; 3714 for (i=0;i<n_recvs;i++) { 3715 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3716 new_local_type_private = MATAIJ_PRIVATE; 3717 break; 3718 } 3719 ptr_idxs += olengths_idxs[i]; 3720 } 3721 switch (new_local_type_private) { 3722 case MATDENSE_PRIVATE: 3723 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3724 new_local_type = MATSEQAIJ; 3725 bs = 1; 3726 } else { /* if I receive only 1 dense matrix */ 3727 new_local_type = MATSEQDENSE; 3728 bs = 1; 3729 } 3730 break; 3731 case MATAIJ_PRIVATE: 3732 new_local_type = MATSEQAIJ; 3733 bs = 1; 3734 break; 3735 case MATBAIJ_PRIVATE: 3736 new_local_type = MATSEQBAIJ; 3737 break; 3738 case MATSBAIJ_PRIVATE: 3739 new_local_type = MATSEQSBAIJ; 3740 break; 3741 default: 3742 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3743 break; 3744 } 3745 } else { /* by default, new_local_type is seqdense */ 3746 new_local_type = MATSEQDENSE; 3747 bs = 1; 3748 } 3749 3750 /* create MATIS object if needed */ 3751 if (reuse == MAT_INITIAL_MATRIX) { 3752 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3753 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3754 } else { 3755 /* it also destroys the local matrices */ 3756 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3757 } 3758 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3759 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3760 3761 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3762 3763 /* Global to local map of received indices */ 3764 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3765 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3766 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3767 3768 /* restore attributes -> type of incoming data and its size */ 3769 buf_size_idxs = 0; 3770 for (i=0;i<n_recvs;i++) { 3771 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3772 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3773 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3774 } 3775 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3776 3777 /* set preallocation */ 3778 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3779 if (!newisdense) { 3780 PetscInt *new_local_nnz=0; 3781 3782 ptr_vals = recv_buffer_vals; 3783 ptr_idxs = recv_buffer_idxs_local; 3784 if (n_recvs) { 3785 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3786 } 3787 for (i=0;i<n_recvs;i++) { 3788 PetscInt j; 3789 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3790 for (j=0;j<*(ptr_idxs+1);j++) { 3791 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3792 } 3793 } else { 3794 /* TODO */ 3795 } 3796 ptr_idxs += olengths_idxs[i]; 3797 } 3798 if (new_local_nnz) { 3799 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3800 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3801 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3802 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3803 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3804 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3805 } else { 3806 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3807 } 3808 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3809 } else { 3810 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3811 } 3812 3813 /* set values */ 3814 ptr_vals = recv_buffer_vals; 3815 ptr_idxs = recv_buffer_idxs_local; 3816 for (i=0;i<n_recvs;i++) { 3817 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3818 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3819 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3820 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3821 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3822 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3823 } else { 3824 /* TODO */ 3825 } 3826 ptr_idxs += olengths_idxs[i]; 3827 ptr_vals += olengths_vals[i]; 3828 } 3829 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3830 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3831 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3832 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3833 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3834 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3835 3836 #if 0 3837 if (!restrict_comm) { /* check */ 3838 Vec lvec,rvec; 3839 PetscReal infty_error; 3840 3841 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3842 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3843 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3844 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3845 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3846 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3847 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3848 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3849 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3850 } 3851 #endif 3852 3853 /* assemble new additional is (if any) */ 3854 if (nis) { 3855 PetscInt **temp_idxs,*count_is,j,psum; 3856 3857 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3858 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3859 ptr_idxs = recv_buffer_idxs_is; 3860 psum = 0; 3861 for (i=0;i<n_recvs;i++) { 3862 for (j=0;j<nis;j++) { 3863 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3864 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3865 psum += plen; 3866 ptr_idxs += plen+1; /* shift pointer to received data */ 3867 } 3868 } 3869 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3870 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3871 for (i=1;i<nis;i++) { 3872 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3873 } 3874 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3875 ptr_idxs = recv_buffer_idxs_is; 3876 for (i=0;i<n_recvs;i++) { 3877 for (j=0;j<nis;j++) { 3878 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3879 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3880 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3881 ptr_idxs += plen+1; /* shift pointer to received data */ 3882 } 3883 } 3884 for (i=0;i<nis;i++) { 3885 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3886 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3887 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3888 } 3889 ierr = PetscFree(count_is);CHKERRQ(ierr); 3890 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3891 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3892 } 3893 /* free workspace */ 3894 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3895 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3896 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3897 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3898 if (isdense) { 3899 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3900 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3901 } else { 3902 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3903 } 3904 if (nis) { 3905 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3906 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3907 } 3908 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3909 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3910 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3911 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3912 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3913 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3914 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3915 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3916 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3917 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3918 ierr = PetscFree(onodes);CHKERRQ(ierr); 3919 if (nis) { 3920 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3921 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3922 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3923 } 3924 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3925 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3926 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3927 for (i=0;i<nis;i++) { 3928 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3929 } 3930 } 3931 PetscFunctionReturn(0); 3932 } 3933 3934 /* temporary hack into ksp private data structure */ 3935 #include <petsc/private/kspimpl.h> 3936 3937 #undef __FUNCT__ 3938 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3939 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3940 { 3941 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3942 PC_IS *pcis = (PC_IS*)pc->data; 3943 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3944 MatNullSpace CoarseNullSpace=NULL; 3945 ISLocalToGlobalMapping coarse_islg; 3946 IS coarse_is,*isarray; 3947 PetscInt i,im_active=-1,active_procs=-1; 3948 PetscInt nis,nisdofs,nisneu; 3949 PC pc_temp; 3950 PCType coarse_pc_type; 3951 KSPType coarse_ksp_type; 3952 PetscBool multilevel_requested,multilevel_allowed; 3953 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3954 Mat t_coarse_mat_is; 3955 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3956 PetscMPIInt all_procs; 3957 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3958 PetscBool compute_vecs = PETSC_FALSE; 3959 PetscScalar *array; 3960 PetscErrorCode ierr; 3961 3962 PetscFunctionBegin; 3963 /* Assign global numbering to coarse dofs */ 3964 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3965 PetscInt ocoarse_size; 3966 compute_vecs = PETSC_TRUE; 3967 ocoarse_size = pcbddc->coarse_size; 3968 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3969 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3970 /* see if we can avoid some work */ 3971 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3972 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3973 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3974 coarse_reuse = PETSC_FALSE; 3975 } else { /* we can safely reuse already computed coarse matrix */ 3976 coarse_reuse = PETSC_TRUE; 3977 } 3978 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3979 coarse_reuse = PETSC_FALSE; 3980 } 3981 /* reset any subassembling information */ 3982 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3983 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3984 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3985 coarse_reuse = PETSC_TRUE; 3986 } 3987 3988 /* count "active" (i.e. with positive local size) and "void" processes */ 3989 im_active = !!(pcis->n); 3990 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3991 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3992 void_procs = all_procs-active_procs; 3993 csin_type_simple = PETSC_TRUE; 3994 redist = PETSC_FALSE; 3995 if (pcbddc->current_level && void_procs) { 3996 csin_ml = PETSC_TRUE; 3997 ncoarse_ml = void_procs; 3998 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3999 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4000 csin_ds = PETSC_TRUE; 4001 ncoarse_ds = pcbddc->redistribute_coarse; 4002 redist = PETSC_TRUE; 4003 } else { 4004 csin_ds = PETSC_TRUE; 4005 ncoarse_ds = active_procs; 4006 redist = PETSC_TRUE; 4007 } 4008 } else { 4009 csin_ml = PETSC_FALSE; 4010 ncoarse_ml = all_procs; 4011 if (void_procs) { 4012 csin_ds = PETSC_TRUE; 4013 ncoarse_ds = void_procs; 4014 csin_type_simple = PETSC_FALSE; 4015 } else { 4016 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4017 csin_ds = PETSC_TRUE; 4018 ncoarse_ds = pcbddc->redistribute_coarse; 4019 redist = PETSC_TRUE; 4020 } else { 4021 csin_ds = PETSC_FALSE; 4022 ncoarse_ds = all_procs; 4023 } 4024 } 4025 } 4026 4027 /* 4028 test if we can go multilevel: three conditions must be satisfied: 4029 - we have not exceeded the number of levels requested 4030 - we can actually subassemble the active processes 4031 - we can find a suitable number of MPI processes where we can place the subassembled problem 4032 */ 4033 multilevel_allowed = PETSC_FALSE; 4034 multilevel_requested = PETSC_FALSE; 4035 if (pcbddc->current_level < pcbddc->max_levels) { 4036 multilevel_requested = PETSC_TRUE; 4037 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4038 multilevel_allowed = PETSC_FALSE; 4039 } else { 4040 multilevel_allowed = PETSC_TRUE; 4041 } 4042 } 4043 /* determine number of process partecipating to coarse solver */ 4044 if (multilevel_allowed) { 4045 ncoarse = ncoarse_ml; 4046 csin = csin_ml; 4047 redist = PETSC_FALSE; 4048 } else { 4049 ncoarse = ncoarse_ds; 4050 csin = csin_ds; 4051 } 4052 4053 /* creates temporary l2gmap and IS for coarse indexes */ 4054 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4055 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4056 4057 /* creates temporary MATIS object for coarse matrix */ 4058 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4059 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4060 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4061 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4062 #if 0 4063 { 4064 PetscViewer viewer; 4065 char filename[256]; 4066 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4067 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4068 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4069 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4070 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4071 } 4072 #endif 4073 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4074 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4075 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4076 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4077 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4078 4079 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4080 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4081 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4082 const PetscInt *idxs; 4083 ISLocalToGlobalMapping tmap; 4084 4085 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4086 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4087 /* allocate space for temporary storage */ 4088 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4089 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4090 /* allocate for IS array */ 4091 nisdofs = pcbddc->n_ISForDofsLocal; 4092 nisneu = !!pcbddc->NeumannBoundariesLocal; 4093 nis = nisdofs + nisneu; 4094 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4095 /* dofs splitting */ 4096 for (i=0;i<nisdofs;i++) { 4097 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4098 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4099 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4100 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4101 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4102 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4104 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4105 } 4106 /* neumann boundaries */ 4107 if (pcbddc->NeumannBoundariesLocal) { 4108 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4109 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4110 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4111 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4112 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4113 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4114 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4115 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4116 } 4117 /* free memory */ 4118 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4119 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4120 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4121 } else { 4122 nis = 0; 4123 nisdofs = 0; 4124 nisneu = 0; 4125 isarray = NULL; 4126 } 4127 /* destroy no longer needed map */ 4128 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4129 4130 /* restrict on coarse candidates (if needed) */ 4131 coarse_mat_is = NULL; 4132 if (csin) { 4133 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4134 if (redist) { 4135 PetscMPIInt rank; 4136 PetscInt spc,n_spc_p1,dest[1],destsize; 4137 4138 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4139 spc = active_procs/ncoarse; 4140 n_spc_p1 = active_procs%ncoarse; 4141 if (im_active) { 4142 destsize = 1; 4143 if (rank > n_spc_p1*(spc+1)-1) { 4144 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4145 } else { 4146 dest[0] = rank/(spc+1); 4147 } 4148 } else { 4149 destsize = 0; 4150 } 4151 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4152 } else if (csin_type_simple) { 4153 PetscMPIInt rank; 4154 PetscInt issize,isidx; 4155 4156 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4157 if (im_active) { 4158 issize = 1; 4159 isidx = (PetscInt)rank; 4160 } else { 4161 issize = 0; 4162 isidx = -1; 4163 } 4164 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4165 } else { /* get a suitable subassembling pattern from MATIS code */ 4166 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4167 } 4168 4169 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4170 if (!redist || ncoarse <= void_procs) { 4171 PetscInt ncoarse_cand,tissize,*nisindices; 4172 PetscInt *coarse_candidates; 4173 const PetscInt* tisindices; 4174 4175 /* get coarse candidates' ranks in pc communicator */ 4176 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4177 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4178 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4179 if (!coarse_candidates[i]) { 4180 coarse_candidates[ncoarse_cand++]=i; 4181 } 4182 } 4183 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4184 4185 4186 if (pcbddc->dbg_flag) { 4187 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4188 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4189 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4190 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4191 for (i=0;i<ncoarse_cand;i++) { 4192 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4193 } 4194 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4195 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4196 } 4197 /* shift the pattern on coarse candidates */ 4198 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4199 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4200 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4201 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4202 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4203 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4204 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4205 } 4206 if (pcbddc->dbg_flag) { 4207 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4208 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4209 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4210 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4211 } 4212 } 4213 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4214 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4215 } else { 4216 if (pcbddc->dbg_flag) { 4217 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4218 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4219 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4220 } 4221 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4222 coarse_mat_is = t_coarse_mat_is; 4223 } 4224 4225 /* create local to global scatters for coarse problem */ 4226 if (compute_vecs) { 4227 PetscInt lrows; 4228 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4229 if (coarse_mat_is) { 4230 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4231 } else { 4232 lrows = 0; 4233 } 4234 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4235 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4236 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4237 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4238 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4239 } 4240 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4241 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4242 4243 /* set defaults for coarse KSP and PC */ 4244 if (multilevel_allowed) { 4245 coarse_ksp_type = KSPRICHARDSON; 4246 coarse_pc_type = PCBDDC; 4247 } else { 4248 coarse_ksp_type = KSPPREONLY; 4249 coarse_pc_type = PCREDUNDANT; 4250 } 4251 4252 /* print some info if requested */ 4253 if (pcbddc->dbg_flag) { 4254 if (!multilevel_allowed) { 4255 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4256 if (multilevel_requested) { 4257 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4258 } else if (pcbddc->max_levels) { 4259 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4260 } 4261 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4262 } 4263 } 4264 4265 /* create the coarse KSP object only once with defaults */ 4266 if (coarse_mat_is) { 4267 MatReuse coarse_mat_reuse; 4268 PetscViewer dbg_viewer = NULL; 4269 if (pcbddc->dbg_flag) { 4270 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4271 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4272 } 4273 if (!pcbddc->coarse_ksp) { 4274 char prefix[256],str_level[16]; 4275 size_t len; 4276 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4277 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4278 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4279 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4280 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4281 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4282 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4283 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4284 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4285 /* prefix */ 4286 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4287 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4288 if (!pcbddc->current_level) { 4289 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4290 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4291 } else { 4292 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4293 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4294 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4295 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4296 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4297 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4298 } 4299 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4300 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4301 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4302 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4303 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4304 /* allow user customization */ 4305 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4306 } 4307 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4308 if (nisdofs) { 4309 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4310 for (i=0;i<nisdofs;i++) { 4311 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4312 } 4313 } 4314 if (nisneu) { 4315 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4316 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4317 } 4318 4319 /* get some info after set from options */ 4320 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4321 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4322 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4323 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4324 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4325 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4326 isbddc = PETSC_FALSE; 4327 } 4328 if (isredundant) { 4329 KSP inner_ksp; 4330 PC inner_pc; 4331 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4332 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4333 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4334 } 4335 4336 /* assemble coarse matrix */ 4337 if (coarse_reuse) { 4338 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4339 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4340 coarse_mat_reuse = MAT_REUSE_MATRIX; 4341 } else { 4342 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4343 } 4344 if (isbddc || isnn) { 4345 if (pcbddc->coarsening_ratio > 1) { 4346 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4347 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4348 if (pcbddc->dbg_flag) { 4349 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4350 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4351 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4352 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4353 } 4354 } 4355 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4356 } else { 4357 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4358 coarse_mat = coarse_mat_is; 4359 } 4360 } else { 4361 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4362 } 4363 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4364 4365 /* propagate symmetry info of coarse matrix */ 4366 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4367 if (pc->pmat->symmetric_set) { 4368 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4369 } 4370 if (pc->pmat->hermitian_set) { 4371 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4372 } 4373 if (pc->pmat->spd_set) { 4374 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4375 } 4376 /* set operators */ 4377 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4378 if (pcbddc->dbg_flag) { 4379 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4380 } 4381 } else { /* processes non partecipating to coarse solver (if any) */ 4382 coarse_mat = 0; 4383 } 4384 ierr = PetscFree(isarray);CHKERRQ(ierr); 4385 #if 0 4386 { 4387 PetscViewer viewer; 4388 char filename[256]; 4389 sprintf(filename,"coarse_mat.m"); 4390 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4391 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4392 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4393 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4394 } 4395 #endif 4396 4397 /* Compute coarse null space (special handling by BDDC only) */ 4398 if (pcbddc->NullSpace) { 4399 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4400 } 4401 4402 if (pcbddc->coarse_ksp) { 4403 Vec crhs,csol; 4404 PetscBool ispreonly; 4405 if (CoarseNullSpace) { 4406 if (isbddc) { 4407 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4408 } else { 4409 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4410 } 4411 } 4412 /* setup coarse ksp */ 4413 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4414 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4415 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4416 /* hack */ 4417 if (!csol) { 4418 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4419 } 4420 if (!crhs) { 4421 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4422 } 4423 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4424 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4425 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4426 KSP check_ksp; 4427 KSPType check_ksp_type; 4428 PC check_pc; 4429 Vec check_vec,coarse_vec; 4430 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4431 PetscInt its; 4432 PetscBool compute_eigs; 4433 PetscReal *eigs_r,*eigs_c; 4434 PetscInt neigs; 4435 const char *prefix; 4436 4437 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4438 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4439 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4440 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4441 if (ispreonly) { 4442 check_ksp_type = KSPPREONLY; 4443 compute_eigs = PETSC_FALSE; 4444 } else { 4445 check_ksp_type = KSPGMRES; 4446 compute_eigs = PETSC_TRUE; 4447 } 4448 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4449 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4450 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4451 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4452 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4453 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4454 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4455 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4456 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4457 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4458 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4459 /* create random vec */ 4460 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4461 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4462 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4463 if (CoarseNullSpace) { 4464 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4465 } 4466 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4467 /* solve coarse problem */ 4468 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4469 if (CoarseNullSpace) { 4470 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4471 } 4472 /* set eigenvalue estimation if preonly has not been requested */ 4473 if (compute_eigs) { 4474 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4475 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4476 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4477 lambda_max = eigs_r[neigs-1]; 4478 lambda_min = eigs_r[0]; 4479 if (pcbddc->use_coarse_estimates) { 4480 if (lambda_max>lambda_min) { 4481 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4482 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4483 } 4484 } 4485 } 4486 4487 /* check coarse problem residual error */ 4488 if (pcbddc->dbg_flag) { 4489 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4490 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4491 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4492 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4493 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4494 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4495 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4496 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4497 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4498 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4499 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4500 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4501 if (compute_eigs) { 4502 PetscReal lambda_max_s,lambda_min_s; 4503 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4504 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4505 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4506 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4507 for (i=0;i<neigs;i++) { 4508 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4509 } 4510 } 4511 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4512 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4513 } 4514 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4515 if (compute_eigs) { 4516 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4517 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4518 } 4519 } 4520 } 4521 /* print additional info */ 4522 if (pcbddc->dbg_flag) { 4523 /* waits until all processes reaches this point */ 4524 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4525 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4526 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4527 } 4528 4529 /* free memory */ 4530 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4531 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4532 PetscFunctionReturn(0); 4533 } 4534 4535 #undef __FUNCT__ 4536 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4537 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4538 { 4539 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4540 PC_IS* pcis = (PC_IS*)pc->data; 4541 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4542 PetscInt i,local_size,coarse_size=0; 4543 PetscInt *local_primal_indices=NULL; 4544 PetscInt *t_local_primal_indices=NULL; 4545 PetscErrorCode ierr; 4546 4547 PetscFunctionBegin; 4548 /* Compute global number of coarse dofs */ 4549 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4550 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4551 } 4552 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr); 4553 ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr); 4554 local_size = 0; 4555 for (i=0;i<pcbddc->local_primal_size_cc;i++) { 4556 PetscInt j; 4557 for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j; 4558 } 4559 ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr); 4560 if (local_size != pcbddc->local_primal_size) { 4561 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4562 } 4563 4564 /* check numbering */ 4565 if (pcbddc->dbg_flag) { 4566 PetscScalar coarsesum,*array; 4567 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4568 4569 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4570 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4571 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4572 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4573 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4574 for (i=0;i<pcbddc->local_primal_size;i++) { 4575 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4576 } 4577 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4578 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4579 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4580 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4581 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4582 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4583 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4584 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4585 for (i=0;i<pcis->n;i++) { 4586 if (array[i] == 1.0) { 4587 set_error = PETSC_TRUE; 4588 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4589 } 4590 } 4591 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4592 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4593 for (i=0;i<pcis->n;i++) { 4594 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4595 } 4596 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4597 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4598 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4599 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4600 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4601 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4602 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4603 PetscInt *gidxs; 4604 4605 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4606 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4607 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4608 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4609 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4610 for (i=0;i<pcbddc->local_primal_size;i++) { 4611 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4612 } 4613 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4614 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4615 } 4616 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4617 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4618 } 4619 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4620 /* get back data */ 4621 *coarse_size_n = coarse_size; 4622 *local_primal_indices_n = local_primal_indices; 4623 PetscFunctionReturn(0); 4624 } 4625 4626 #undef __FUNCT__ 4627 #define __FUNCT__ "PCBDDCGlobalToLocal" 4628 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4629 { 4630 IS localis_t; 4631 PetscInt i,lsize,*idxs,n; 4632 PetscScalar *vals; 4633 PetscErrorCode ierr; 4634 4635 PetscFunctionBegin; 4636 /* get indices in local ordering exploiting local to global map */ 4637 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4638 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4639 for (i=0;i<lsize;i++) vals[i] = 1.0; 4640 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4641 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4642 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4643 if (idxs) { /* multilevel guard */ 4644 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4645 } 4646 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4647 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4648 ierr = PetscFree(vals);CHKERRQ(ierr); 4649 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4650 /* now compute set in local ordering */ 4651 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4652 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4653 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4654 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4655 for (i=0,lsize=0;i<n;i++) { 4656 if (PetscRealPart(vals[i]) > 0.5) { 4657 lsize++; 4658 } 4659 } 4660 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4661 for (i=0,lsize=0;i<n;i++) { 4662 if (PetscRealPart(vals[i]) > 0.5) { 4663 idxs[lsize++] = i; 4664 } 4665 } 4666 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4667 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4668 *localis = localis_t; 4669 PetscFunctionReturn(0); 4670 } 4671 4672 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4673 #undef __FUNCT__ 4674 #define __FUNCT__ "PCBDDCMatMult_Private" 4675 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4676 { 4677 PCBDDCChange_ctx change_ctx; 4678 PetscErrorCode ierr; 4679 4680 PetscFunctionBegin; 4681 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4682 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4683 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4684 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4685 PetscFunctionReturn(0); 4686 } 4687 4688 #undef __FUNCT__ 4689 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4690 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4691 { 4692 PCBDDCChange_ctx change_ctx; 4693 PetscErrorCode ierr; 4694 4695 PetscFunctionBegin; 4696 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4697 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4698 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4699 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4700 PetscFunctionReturn(0); 4701 } 4702 4703 #undef __FUNCT__ 4704 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4705 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4706 { 4707 PC_IS *pcis=(PC_IS*)pc->data; 4708 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4709 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4710 Mat S_j; 4711 PetscInt *used_xadj,*used_adjncy; 4712 PetscBool free_used_adj; 4713 PetscErrorCode ierr; 4714 4715 PetscFunctionBegin; 4716 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4717 free_used_adj = PETSC_FALSE; 4718 if (pcbddc->sub_schurs_layers == -1) { 4719 used_xadj = NULL; 4720 used_adjncy = NULL; 4721 } else { 4722 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4723 used_xadj = pcbddc->mat_graph->xadj; 4724 used_adjncy = pcbddc->mat_graph->adjncy; 4725 } else if (pcbddc->computed_rowadj) { 4726 used_xadj = pcbddc->mat_graph->xadj; 4727 used_adjncy = pcbddc->mat_graph->adjncy; 4728 } else { 4729 PetscBool flg_row=PETSC_FALSE; 4730 const PetscInt *xadj,*adjncy; 4731 PetscInt nvtxs; 4732 4733 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4734 if (flg_row) { 4735 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4736 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4737 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4738 free_used_adj = PETSC_TRUE; 4739 } else { 4740 pcbddc->sub_schurs_layers = -1; 4741 used_xadj = NULL; 4742 used_adjncy = NULL; 4743 } 4744 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4745 } 4746 } 4747 4748 /* setup sub_schurs data */ 4749 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4750 if (!sub_schurs->use_mumps) { 4751 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4752 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4753 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4754 } else { 4755 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4756 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4757 } 4758 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4759 4760 /* free adjacency */ 4761 if (free_used_adj) { 4762 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4763 } 4764 PetscFunctionReturn(0); 4765 } 4766 4767 #undef __FUNCT__ 4768 #define __FUNCT__ "PCBDDCInitSubSchurs" 4769 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4770 { 4771 PC_IS *pcis=(PC_IS*)pc->data; 4772 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4773 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4774 PCBDDCGraph graph; 4775 PetscErrorCode ierr; 4776 4777 PetscFunctionBegin; 4778 /* attach interface graph for determining subsets */ 4779 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4780 IS verticesIS,verticescomm; 4781 PetscInt vsize,*idxs; 4782 4783 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4784 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4785 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4786 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4787 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4788 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4789 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4790 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4791 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4792 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4793 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4794 /* 4795 if (pcbddc->dbg_flag) { 4796 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4797 } 4798 */ 4799 } else { 4800 graph = pcbddc->mat_graph; 4801 } 4802 4803 /* sub_schurs init */ 4804 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4805 4806 /* free graph struct */ 4807 if (pcbddc->sub_schurs_rebuild) { 4808 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4809 } 4810 PetscFunctionReturn(0); 4811 } 4812