1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 51 PetscInt subset_size; 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 } 56 57 /* min/max and threshold */ 58 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 59 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 60 nmax = PetscMax(nmin,nmax); 61 allocated_S_St = PETSC_FALSE; 62 if (nmin) { 63 allocated_S_St = PETSC_TRUE; 64 } 65 66 /* allocate lapack workspace */ 67 cum = cum2 = 0; 68 maxneigs = 0; 69 for (i=0;i<sub_schurs->n_subs;i++) { 70 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 71 PetscInt n,subset_size; 72 73 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 74 n = PetscMin(subset_size,nmax); 75 cum += subset_size; 76 cum2 += subset_size*n; 77 maxneigs = PetscMax(maxneigs,n); 78 } 79 } 80 if (mss) { 81 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 82 PetscBLASInt B_itype = 1; 83 PetscBLASInt B_N = mss; 84 PetscReal zero = 0.0; 85 PetscReal eps = 0.0; /* dlamch? */ 86 87 B_lwork = -1; 88 S = NULL; 89 St = NULL; 90 eigs = NULL; 91 eigv = NULL; 92 B_iwork = NULL; 93 B_ifail = NULL; 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 112 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 PetscInt j,subset_size; 155 156 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 157 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 158 const PetscInt *idxs; 159 PetscReal infty = PETSC_MAX_REAL; 160 PetscInt eigs_start = 0; 161 PetscBLASInt B_N; 162 PetscBool same_data = PETSC_FALSE; 163 164 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 165 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 166 if (sub_schurs->is_hermitian) { 167 PetscInt j,k; 168 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 169 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 170 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 171 } 172 for (j=0;j<subset_size;j++) { 173 for (k=j;k<subset_size;k++) { 174 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 175 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 176 } 177 } 178 } else { 179 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 180 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 181 } 182 } else { 183 S = Sarray + cumarray; 184 St = Starray + cumarray; 185 } 186 187 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 188 /* see if we can save some work */ 189 if (sub_schurs->n_subs == 1) { 190 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 191 } 192 193 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 194 B_neigs = 0; 195 } else { 196 /* Threshold: this is an heuristic for edges */ 197 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 198 199 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 200 PetscBLASInt B_itype = 1; 201 PetscBLASInt B_IL, B_IU; 202 PetscReal eps = -1.0; /* dlamch? */ 203 PetscInt nmin_s; 204 205 /* ask for eigenvalues larger than thresh */ 206 if (pcbddc->dbg_flag) { 207 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 208 } 209 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 210 #if defined(PETSC_USE_COMPLEX) 211 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 212 #else 213 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 214 #endif 215 ierr = PetscFPTrapPop();CHKERRQ(ierr); 216 if (B_ierr) { 217 if (B_ierr < 0 ) { 218 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 219 } else if (B_ierr <= B_N) { 220 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 221 } else { 222 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 223 } 224 } 225 226 if (B_neigs > nmax) { 227 if (pcbddc->dbg_flag) { 228 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 229 } 230 eigs_start = B_neigs -nmax; 231 B_neigs = nmax; 232 } 233 234 nmin_s = PetscMin(nmin,B_N); 235 if (B_neigs < nmin_s) { 236 PetscBLASInt B_neigs2; 237 238 B_IU = B_N - B_neigs; 239 B_IL = B_N - nmin_s + 1; 240 if (pcbddc->dbg_flag) { 241 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 242 } 243 if (sub_schurs->is_hermitian) { 244 PetscInt j; 245 for (j=0;j<subset_size;j++) { 246 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 247 } 248 for (j=0;j<subset_size;j++) { 249 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 250 } 251 } else { 252 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 254 } 255 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 256 #if defined(PETSC_USE_COMPLEX) 257 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 258 #else 259 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 260 #endif 261 ierr = PetscFPTrapPop();CHKERRQ(ierr); 262 B_neigs += B_neigs2; 263 } 264 if (B_ierr) { 265 if (B_ierr < 0 ) { 266 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 267 } else if (B_ierr <= B_N) { 268 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 269 } else { 270 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 271 } 272 } 273 if (pcbddc->dbg_flag) { 274 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 275 for (j=0;j<B_neigs;j++) { 276 if (eigs[j] == 0.0) { 277 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 278 } else { 279 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]); 280 } 281 } 282 } 283 } else { 284 /* TODO */ 285 } 286 } 287 maxneigs = PetscMax(B_neigs,maxneigs); 288 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 289 if (B_neigs) { 290 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 291 292 if (pcbddc->dbg_flag > 1) { 293 PetscInt ii; 294 for (ii=0;ii<B_neigs;ii++) { 295 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 296 for (j=0;j<B_N;j++) { 297 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 298 } 299 } 300 } 301 #if 0 302 for (j=0;j<B_neigs;j++) { 303 PetscBLASInt Blas_N,Blas_one = 1.0; 304 PetscScalar norm; 305 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 306 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 307 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 308 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 309 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 310 } else { 311 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 312 } 313 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 314 } 315 #endif 316 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 317 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 318 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 319 cum++; 320 } 321 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 322 } 323 /* shift for next computation */ 324 cumarray += subset_size*subset_size; 325 } 326 if (pcbddc->dbg_flag) { 327 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 328 } 329 330 if (mss) { 331 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 332 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 333 /* destroy matrices (junk) */ 334 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 335 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 336 } 337 if (allocated_S_St) { 338 ierr = PetscFree2(S,St);CHKERRQ(ierr); 339 } 340 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 341 #if defined(PETSC_USE_COMPLEX) 342 ierr = PetscFree(rwork);CHKERRQ(ierr); 343 #endif 344 if (pcbddc->dbg_flag) { 345 PetscInt maxneigs_r; 346 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 347 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 348 } 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "PCBDDCSetUpSolvers" 354 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 355 { 356 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 357 PetscScalar *coarse_submat_vals; 358 PetscErrorCode ierr; 359 360 PetscFunctionBegin; 361 /* Setup local scatters R_to_B and (optionally) R_to_D */ 362 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 363 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 364 365 /* Setup local neumann solver ksp_R */ 366 /* PCBDDCSetUpLocalScatters should be called first! */ 367 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 368 369 /* Change global null space passed in by the user if change of basis has been requested */ 370 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 371 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 372 } 373 374 /* 375 Setup local correction and local part of coarse basis. 376 Gives back the dense local part of the coarse matrix in column major ordering 377 */ 378 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 379 380 /* Compute total number of coarse nodes and setup coarse solver */ 381 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 382 383 /* free */ 384 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCBDDCResetCustomization" 390 PetscErrorCode PCBDDCResetCustomization(PC pc) 391 { 392 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 393 PetscErrorCode ierr; 394 395 PetscFunctionBegin; 396 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 397 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 398 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 399 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 400 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 401 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 402 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 403 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 404 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 405 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 406 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 407 PetscFunctionReturn(0); 408 } 409 410 #undef __FUNCT__ 411 #define __FUNCT__ "PCBDDCResetTopography" 412 PetscErrorCode PCBDDCResetTopography(PC pc) 413 { 414 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 419 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 420 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 421 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 422 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "PCBDDCResetSolvers" 428 PetscErrorCode PCBDDCResetSolvers(PC pc) 429 { 430 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 431 PetscScalar *array; 432 PetscErrorCode ierr; 433 434 PetscFunctionBegin; 435 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 436 if (pcbddc->coarse_phi_B) { 437 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 438 ierr = PetscFree(array);CHKERRQ(ierr); 439 } 440 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 441 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 442 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 443 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 444 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 445 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 446 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 447 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 448 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 449 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 450 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 451 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 452 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 453 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 454 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 455 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 456 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 457 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 458 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 459 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 460 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 461 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 462 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 468 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 469 { 470 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 471 PC_IS *pcis = (PC_IS*)pc->data; 472 VecType impVecType; 473 PetscInt n_constraints,n_R,old_size; 474 PetscErrorCode ierr; 475 476 PetscFunctionBegin; 477 if (!pcbddc->ConstraintMatrix) { 478 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 479 } 480 /* get sizes */ 481 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 482 n_R = pcis->n-pcbddc->n_vertices; 483 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 484 /* local work vectors (try to avoid unneeded work)*/ 485 /* R nodes */ 486 old_size = -1; 487 if (pcbddc->vec1_R) { 488 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 489 } 490 if (n_R != old_size) { 491 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 492 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 493 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 494 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 495 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 496 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 497 } 498 /* local primal dofs */ 499 old_size = -1; 500 if (pcbddc->vec1_P) { 501 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 502 } 503 if (pcbddc->local_primal_size != old_size) { 504 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 505 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 506 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 507 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 508 } 509 /* local explicit constraints */ 510 old_size = -1; 511 if (pcbddc->vec1_C) { 512 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 513 } 514 if (n_constraints && n_constraints != old_size) { 515 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 516 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 517 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 518 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 519 } 520 PetscFunctionReturn(0); 521 } 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "PCBDDCSetUpCorrection" 525 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 526 { 527 PetscErrorCode ierr; 528 /* pointers to pcis and pcbddc */ 529 PC_IS* pcis = (PC_IS*)pc->data; 530 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 531 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 532 /* submatrices of local problem */ 533 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 534 /* submatrices of local coarse problem */ 535 Mat S_VV,S_CV,S_VC,S_CC; 536 /* working matrices */ 537 Mat C_CR; 538 /* additional working stuff */ 539 PC pc_R; 540 Mat F; 541 PetscBool isLU,isCHOL,isILU; 542 543 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 544 PetscScalar *work; 545 PetscInt *idx_V_B; 546 PetscInt n,n_vertices,n_constraints; 547 PetscInt i,n_R,n_D,n_B; 548 PetscBool unsymmetric_check; 549 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 550 MatType impMatType; 551 /* some shortcuts to scalars */ 552 PetscScalar one=1.0,m_one=-1.0; 553 554 PetscFunctionBegin; 555 n_vertices = pcbddc->n_vertices; 556 n_constraints = pcbddc->local_primal_size-n_vertices; 557 /* Set Non-overlapping dimensions */ 558 n_B = pcis->n_B; 559 n_D = pcis->n - n_B; 560 n_R = pcis->n - n_vertices; 561 562 /* Set types for local objects needed by BDDC precondtioner */ 563 impMatType = MATSEQDENSE; 564 565 /* vertices in boundary numbering */ 566 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 567 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 568 if (i != n_vertices) { 569 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 570 } 571 572 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 573 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 574 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 575 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 576 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 577 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 578 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 579 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 580 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 581 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 582 583 unsymmetric_check = PETSC_FALSE; 584 /* allocate workspace */ 585 n = 0; 586 if (n_constraints) { 587 n += n_R*n_constraints; 588 } 589 if (n_vertices) { 590 n = PetscMax(2*n_R*n_vertices,n); 591 n = PetscMax((n_R+n_B)*n_vertices,n); 592 } 593 if (!pcbddc->symmetric_primal) { 594 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 595 unsymmetric_check = PETSC_TRUE; 596 } 597 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 598 599 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 600 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 601 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 602 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 603 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 604 if (isLU || isILU || isCHOL) { 605 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 606 } else if (sub_schurs->reuse_mumps) { 607 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 608 MatFactorType type; 609 610 F = reuse_mumps->F; 611 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 612 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 613 } else { 614 F = NULL; 615 } 616 617 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 618 if (n_constraints) { 619 Mat M1,M2,M3; 620 Mat auxmat; 621 IS is_aux; 622 PetscScalar *array,*array2; 623 624 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 625 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 626 627 /* Extract constraints on R nodes: C_{CR} */ 628 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 629 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 630 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 631 632 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 633 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 634 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 635 for (i=0;i<n_constraints;i++) { 636 const PetscScalar *row_cmat_values; 637 const PetscInt *row_cmat_indices; 638 PetscInt size_of_constraint,j; 639 640 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 641 for (j=0;j<size_of_constraint;j++) { 642 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 643 } 644 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 645 } 646 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 647 if (F) { 648 Mat B; 649 650 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 651 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 652 ierr = MatDestroy(&B);CHKERRQ(ierr); 653 } else { 654 PetscScalar *marr; 655 656 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 657 for (i=0;i<n_constraints;i++) { 658 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 659 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 660 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 661 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 662 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 663 } 664 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 665 } 666 if (!pcbddc->switch_static) { 667 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 668 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 669 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 670 for (i=0;i<n_constraints;i++) { 671 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 672 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 673 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 674 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 675 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 676 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 677 } 678 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 679 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 680 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 681 } else { 682 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 683 pcbddc->local_auxmat2 = local_auxmat2_R; 684 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 685 } 686 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 687 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 688 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 689 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 690 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 691 if (isCHOL) { 692 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 693 } else { 694 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 695 } 696 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 697 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 698 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 699 ierr = MatDestroy(&M2);CHKERRQ(ierr); 700 ierr = MatDestroy(&M3);CHKERRQ(ierr); 701 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 702 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 703 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 704 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 705 ierr = MatDestroy(&M1);CHKERRQ(ierr); 706 } 707 /* Get submatrices from subdomain matrix */ 708 if (n_vertices) { 709 Mat newmat; 710 IS is_aux; 711 PetscInt ibs,mbs; 712 PetscBool issbaij; 713 714 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 715 IS tis; 716 717 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 718 ierr = ISSort(tis);CHKERRQ(ierr); 719 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 720 ierr = ISDestroy(&tis);CHKERRQ(ierr); 721 } else { 722 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 723 } 724 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 725 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 726 if (ibs != mbs) { /* need to convert to SEQAIJ */ 727 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 728 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 729 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 730 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 731 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 732 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 733 } else { 734 /* this is safe */ 735 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 736 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 737 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 738 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 739 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 740 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 741 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 742 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 743 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 744 } else { 745 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 746 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 747 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 748 } 749 /* need to convert to SeqAIJ for MatMatMult with dense matrix */ 750 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 751 } 752 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 753 } 754 755 /* Matrix of coarse basis functions (local) */ 756 if (pcbddc->coarse_phi_B) { 757 PetscInt on_B,on_primal,on_D=n_D; 758 if (pcbddc->coarse_phi_D) { 759 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 760 } 761 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 762 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 763 PetscScalar *marray; 764 765 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 766 ierr = PetscFree(marray);CHKERRQ(ierr); 767 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 768 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 769 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 770 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 771 } 772 } 773 774 if (!pcbddc->coarse_phi_B) { 775 PetscScalar *marray; 776 777 n = n_B*pcbddc->local_primal_size; 778 if (pcbddc->switch_static || pcbddc->dbg_flag) { 779 n += n_D*pcbddc->local_primal_size; 780 } 781 if (!pcbddc->symmetric_primal) { 782 n *= 2; 783 } 784 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 785 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 786 n = n_B*pcbddc->local_primal_size; 787 if (pcbddc->switch_static || pcbddc->dbg_flag) { 788 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 789 n += n_D*pcbddc->local_primal_size; 790 } 791 if (!pcbddc->symmetric_primal) { 792 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 793 if (pcbddc->switch_static || pcbddc->dbg_flag) { 794 n = n_B*pcbddc->local_primal_size; 795 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 796 } 797 } else { 798 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 799 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 800 if (pcbddc->switch_static || pcbddc->dbg_flag) { 801 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 802 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 803 } 804 } 805 } 806 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 807 /* vertices */ 808 if (n_vertices) { 809 810 if (n_R) { 811 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 812 PetscBLASInt B_N,B_one = 1; 813 PetscScalar *x,*y; 814 815 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 816 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 817 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 818 if (F) { /* TODO could be optimized for symmetric problems */ 819 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 820 } else { 821 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 822 for (i=0;i<n_vertices;i++) { 823 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 824 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 825 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 826 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 827 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 828 } 829 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 830 } 831 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 832 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 833 if (n_constraints) { 834 Mat B; 835 836 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 837 for (i=0;i<n_vertices;i++) { 838 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 839 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 840 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 841 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 842 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 843 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 844 } 845 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 846 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 847 ierr = MatDestroy(&B);CHKERRQ(ierr); 848 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 849 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 850 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 851 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 852 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 853 ierr = MatDestroy(&B);CHKERRQ(ierr); 854 } 855 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 856 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 857 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 858 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 859 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 860 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 861 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 862 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 863 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 864 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 865 } else { 866 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 867 } 868 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 869 /* coarse basis functions */ 870 for (i=0;i<n_vertices;i++) { 871 PetscScalar *y; 872 873 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 874 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 875 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 876 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 877 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 878 y[n_B*i+idx_V_B[i]] = 1.0; 879 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 880 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 881 882 if (pcbddc->switch_static || pcbddc->dbg_flag) { 883 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 884 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 885 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 888 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 889 } 890 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 891 } 892 } 893 894 if (n_constraints) { 895 Mat B; 896 897 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 898 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 899 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 900 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 901 if (n_vertices) { 902 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 903 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 904 } else { 905 Mat S_VCt; 906 907 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 908 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 909 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 910 } 911 } 912 ierr = MatDestroy(&B);CHKERRQ(ierr); 913 /* coarse basis functions */ 914 for (i=0;i<n_constraints;i++) { 915 PetscScalar *y; 916 917 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 918 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 919 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 920 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 921 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 923 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 924 if (pcbddc->switch_static || pcbddc->dbg_flag) { 925 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 926 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 927 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 929 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 930 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 931 } 932 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 933 } 934 } 935 if (n_constraints) { 936 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 937 } 938 939 /* compute other basis functions for non-symmetric problems */ 940 if (!pcbddc->symmetric_primal) { 941 942 if (n_constraints) { 943 Mat S_CCT,B_C; 944 945 /* this is a lazy thing */ 946 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 947 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 948 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 949 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 950 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 951 if (n_vertices) { 952 Mat B_V,S_VCT; 953 954 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 955 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 956 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 957 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 958 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 959 } 960 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 961 } else { /* if there are no constraints, reset work */ 962 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 963 } 964 if (n_vertices && n_R) { 965 Mat A_VRT; 966 PetscScalar *marray; 967 PetscBLASInt B_N,B_one = 1; 968 969 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 970 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 971 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 972 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 973 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 974 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 975 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 976 } 977 978 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 979 for (i=0;i<pcbddc->local_primal_size;i++) { 980 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 981 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 982 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 983 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 984 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 985 } 986 } else { 987 for (i=0;i<pcbddc->local_primal_size;i++) { 988 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 989 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 990 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 991 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 992 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 993 } 994 } 995 /* coarse basis functions */ 996 for (i=0;i<pcbddc->local_primal_size;i++) { 997 PetscScalar *y; 998 999 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1000 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1001 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1002 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1003 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 if (i<n_vertices) { 1005 y[n_B*i+idx_V_B[i]] = 1.0; 1006 } 1007 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1008 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1009 1010 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1011 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1012 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1013 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1015 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1016 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1017 } 1018 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1019 } 1020 } 1021 /* free memory */ 1022 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1023 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1024 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1025 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1026 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1027 ierr = PetscFree(work);CHKERRQ(ierr); 1028 if (n_vertices) { 1029 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1030 } 1031 if (n_constraints) { 1032 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1033 } 1034 /* Checking coarse_sub_mat and coarse basis functios */ 1035 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1036 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1037 if (pcbddc->dbg_flag) { 1038 Mat coarse_sub_mat; 1039 Mat AUXMAT,TM1,TM2,TM3,TM4; 1040 Mat coarse_phi_D,coarse_phi_B; 1041 Mat coarse_psi_D,coarse_psi_B; 1042 Mat A_II,A_BB,A_IB,A_BI; 1043 Mat C_B,CPHI; 1044 IS is_dummy; 1045 Vec mones; 1046 MatType checkmattype=MATSEQAIJ; 1047 PetscReal real_value; 1048 1049 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1050 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1051 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1052 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1053 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1054 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1055 if (unsymmetric_check) { 1056 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1057 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1058 } 1059 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1060 1061 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1062 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1063 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1064 if (unsymmetric_check) { 1065 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1066 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1067 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1068 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1069 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1070 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1071 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1072 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1073 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1074 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1075 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1076 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1077 } else { 1078 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1079 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1080 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1081 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1082 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1083 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1084 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1085 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1086 } 1087 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1088 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1089 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1090 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1091 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1092 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1093 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1095 1096 /* check constraints */ 1097 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1098 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1099 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1100 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1101 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1102 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1103 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1104 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1105 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1106 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1107 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1108 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1109 1110 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1111 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1112 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1113 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1114 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1115 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1116 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1117 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1118 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1119 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1120 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1121 if (unsymmetric_check) { 1122 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1123 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1124 } 1125 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1126 } 1127 /* get back data */ 1128 *coarse_submat_vals_n = coarse_submat_vals; 1129 PetscFunctionReturn(0); 1130 } 1131 1132 #undef __FUNCT__ 1133 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1134 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1135 { 1136 Mat *work_mat; 1137 IS isrow_s,iscol_s; 1138 PetscBool rsorted,csorted; 1139 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1144 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1145 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1146 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1147 1148 if (!rsorted) { 1149 const PetscInt *idxs; 1150 PetscInt *idxs_sorted,i; 1151 1152 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1153 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1154 for (i=0;i<rsize;i++) { 1155 idxs_perm_r[i] = i; 1156 } 1157 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1158 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1159 for (i=0;i<rsize;i++) { 1160 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1161 } 1162 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1163 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1164 } else { 1165 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1166 isrow_s = isrow; 1167 } 1168 1169 if (!csorted) { 1170 if (isrow == iscol) { 1171 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1172 iscol_s = isrow_s; 1173 } else { 1174 const PetscInt *idxs; 1175 PetscInt *idxs_sorted,i; 1176 1177 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1178 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1179 for (i=0;i<csize;i++) { 1180 idxs_perm_c[i] = i; 1181 } 1182 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1183 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1184 for (i=0;i<csize;i++) { 1185 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1186 } 1187 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1188 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1189 } 1190 } else { 1191 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1192 iscol_s = iscol; 1193 } 1194 1195 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1196 1197 if (!rsorted || !csorted) { 1198 Mat new_mat; 1199 IS is_perm_r,is_perm_c; 1200 1201 if (!rsorted) { 1202 PetscInt *idxs_r,i; 1203 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1204 for (i=0;i<rsize;i++) { 1205 idxs_r[idxs_perm_r[i]] = i; 1206 } 1207 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1208 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1209 } else { 1210 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1211 } 1212 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1213 1214 if (!csorted) { 1215 if (isrow_s == iscol_s) { 1216 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1217 is_perm_c = is_perm_r; 1218 } else { 1219 PetscInt *idxs_c,i; 1220 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1221 for (i=0;i<csize;i++) { 1222 idxs_c[idxs_perm_c[i]] = i; 1223 } 1224 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1225 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1226 } 1227 } else { 1228 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1229 } 1230 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1231 1232 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1233 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1234 work_mat[0] = new_mat; 1235 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1236 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1237 } 1238 1239 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1240 *B = work_mat[0]; 1241 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1242 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1243 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1244 PetscFunctionReturn(0); 1245 } 1246 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1249 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1250 { 1251 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1252 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1253 Mat new_mat; 1254 IS is_local,is_global; 1255 PetscInt local_size; 1256 PetscBool isseqaij; 1257 PetscErrorCode ierr; 1258 1259 PetscFunctionBegin; 1260 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1261 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1262 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1263 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1264 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1265 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1266 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1267 1268 /* check */ 1269 if (pcbddc->dbg_flag) { 1270 Vec x,x_change; 1271 PetscReal error; 1272 1273 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1274 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1275 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1276 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1277 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1278 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1279 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1280 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1281 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1282 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1283 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1284 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1285 ierr = VecDestroy(&x);CHKERRQ(ierr); 1286 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1287 } 1288 1289 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1290 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1291 if (isseqaij) { 1292 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1293 } else { 1294 Mat work_mat; 1295 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1296 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1297 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1298 } 1299 if (matis->A->symmetric_set) { 1300 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1301 #if !defined(PETSC_USE_COMPLEX) 1302 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1303 #endif 1304 } 1305 /* 1306 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1307 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1308 */ 1309 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1315 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1316 { 1317 PC_IS* pcis = (PC_IS*)(pc->data); 1318 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1319 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1320 PetscInt *idx_R_local=NULL; 1321 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1322 PetscInt vbs,bs; 1323 PetscBT bitmask=NULL; 1324 PetscErrorCode ierr; 1325 1326 PetscFunctionBegin; 1327 /* 1328 No need to setup local scatters if 1329 - primal space is unchanged 1330 AND 1331 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1332 AND 1333 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1334 */ 1335 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1336 PetscFunctionReturn(0); 1337 } 1338 /* destroy old objects */ 1339 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1340 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1341 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1342 /* Set Non-overlapping dimensions */ 1343 n_B = pcis->n_B; 1344 n_D = pcis->n - n_B; 1345 n_vertices = pcbddc->n_vertices; 1346 1347 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1348 1349 /* create auxiliary bitmask and allocate workspace */ 1350 if (!sub_schurs->reuse_mumps) { 1351 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1352 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1353 for (i=0;i<n_vertices;i++) { 1354 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1355 } 1356 1357 for (i=0, n_R=0; i<pcis->n; i++) { 1358 if (!PetscBTLookup(bitmask,i)) { 1359 idx_R_local[n_R++] = i; 1360 } 1361 } 1362 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1363 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1364 1365 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1366 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1367 } 1368 1369 /* Block code */ 1370 vbs = 1; 1371 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1372 if (bs>1 && !(n_vertices%bs)) { 1373 PetscBool is_blocked = PETSC_TRUE; 1374 PetscInt *vary; 1375 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1376 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1377 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1378 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1379 for (i=0; i<n_vertices/bs; i++) { 1380 if (vary[i]!=0 && vary[i]!=bs) { 1381 is_blocked = PETSC_FALSE; 1382 break; 1383 } 1384 } 1385 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1386 vbs = bs; 1387 for (i=0;i<n_R/vbs;i++) { 1388 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1389 } 1390 } 1391 ierr = PetscFree(vary);CHKERRQ(ierr); 1392 } 1393 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1394 if (sub_schurs->reuse_mumps) { 1395 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1396 1397 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1398 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1399 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1400 reuse_mumps->is_R = pcbddc->is_R_local; 1401 } else { 1402 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1403 } 1404 1405 /* print some info if requested */ 1406 if (pcbddc->dbg_flag) { 1407 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1408 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1409 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1410 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1411 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1412 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1413 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1414 } 1415 1416 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1417 if (!sub_schurs->reuse_mumps) { 1418 IS is_aux1,is_aux2; 1419 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1420 1421 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1422 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1423 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1424 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1425 for (i=0; i<n_D; i++) { 1426 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1427 } 1428 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1429 for (i=0, j=0; i<n_R; i++) { 1430 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1431 aux_array1[j++] = i; 1432 } 1433 } 1434 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1435 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1436 for (i=0, j=0; i<n_B; i++) { 1437 if (!PetscBTLookup(bitmask,is_indices[i])) { 1438 aux_array2[j++] = i; 1439 } 1440 } 1441 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1442 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1443 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1444 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1445 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1446 1447 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1448 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1449 for (i=0, j=0; i<n_R; i++) { 1450 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1451 aux_array1[j++] = i; 1452 } 1453 } 1454 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1455 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1456 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1457 } 1458 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1459 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1460 } else { 1461 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1462 IS tis; 1463 PetscInt schur_size; 1464 1465 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1466 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1467 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1468 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1469 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1470 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1471 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1472 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1473 } 1474 } 1475 PetscFunctionReturn(0); 1476 } 1477 1478 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1481 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1482 { 1483 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1484 PC_IS *pcis = (PC_IS*)pc->data; 1485 PC pc_temp; 1486 Mat A_RR; 1487 MatReuse reuse; 1488 PetscScalar m_one = -1.0; 1489 PetscReal value; 1490 PetscInt n_D,n_R,ibs,mbs; 1491 PetscBool use_exact,use_exact_reduced,issbaij; 1492 PetscErrorCode ierr; 1493 /* prefixes stuff */ 1494 char dir_prefix[256],neu_prefix[256],str_level[16]; 1495 size_t len; 1496 1497 PetscFunctionBegin; 1498 1499 /* compute prefixes */ 1500 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1501 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1502 if (!pcbddc->current_level) { 1503 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1504 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1505 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1506 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1507 } else { 1508 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1509 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1510 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1511 len -= 15; /* remove "pc_bddc_coarse_" */ 1512 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1513 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1514 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1515 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1516 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1517 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1518 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1519 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1520 } 1521 1522 /* DIRICHLET PROBLEM */ 1523 if (dirichlet) { 1524 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1525 if (pcbddc->local_mat->symmetric_set) { 1526 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1527 } 1528 /* Matrix for Dirichlet problem is pcis->A_II */ 1529 n_D = pcis->n - pcis->n_B; 1530 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1531 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1532 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1533 /* default */ 1534 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1535 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1536 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1537 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1538 if (issbaij) { 1539 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1540 } else { 1541 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1542 } 1543 /* Allow user's customization */ 1544 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1545 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1546 } 1547 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1548 if (sub_schurs->reuse_mumps) { 1549 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1550 1551 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1552 } 1553 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1554 if (!n_D) { 1555 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1556 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1557 } 1558 /* Set Up KSP for Dirichlet problem of BDDC */ 1559 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1560 /* set ksp_D into pcis data */ 1561 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1562 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1563 pcis->ksp_D = pcbddc->ksp_D; 1564 } 1565 1566 /* NEUMANN PROBLEM */ 1567 A_RR = 0; 1568 if (neumann) { 1569 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1570 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1571 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1572 if (pcbddc->ksp_R) { /* already created ksp */ 1573 PetscInt nn_R; 1574 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1575 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1576 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1577 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1578 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1579 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1580 reuse = MAT_INITIAL_MATRIX; 1581 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1582 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1583 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1584 reuse = MAT_INITIAL_MATRIX; 1585 } else { /* safe to reuse the matrix */ 1586 reuse = MAT_REUSE_MATRIX; 1587 } 1588 } 1589 /* last check */ 1590 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1591 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1592 reuse = MAT_INITIAL_MATRIX; 1593 } 1594 } else { /* first time, so we need to create the matrix */ 1595 reuse = MAT_INITIAL_MATRIX; 1596 } 1597 /* extract A_RR */ 1598 if (!sub_schurs->reuse_mumps) { 1599 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1600 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1601 if (ibs != mbs) { 1602 Mat newmat; 1603 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1604 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1605 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1606 } else { 1607 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1608 } 1609 if (pcbddc->local_mat->symmetric_set) { 1610 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1611 } 1612 } else { 1613 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1614 1615 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1616 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1617 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1618 } 1619 if (!pcbddc->ksp_R) { /* create object if not present */ 1620 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1621 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1622 /* default */ 1623 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1624 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1625 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1626 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1627 if (issbaij) { 1628 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1629 } else { 1630 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1631 } 1632 /* Allow user's customization */ 1633 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1634 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1635 } 1636 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1637 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1638 if (!n_R) { 1639 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1640 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1641 } 1642 /* Reuse MUMPS solver if it is present */ 1643 if (sub_schurs->reuse_mumps) { 1644 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1645 1646 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1647 } 1648 /* Set Up KSP for Neumann problem of BDDC */ 1649 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1650 } 1651 /* free Neumann problem's matrix */ 1652 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1653 1654 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1655 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1656 if (pcbddc->dbg_flag) { 1657 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1658 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1659 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1660 } 1661 if (dirichlet) { /* Dirichlet */ 1662 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1663 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1664 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1665 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1666 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1667 /* need to be adapted? */ 1668 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1669 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1670 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1671 /* print info */ 1672 if (pcbddc->dbg_flag) { 1673 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1674 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1675 } 1676 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1677 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1678 } 1679 } 1680 if (neumann) { /* Neumann */ 1681 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1682 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1683 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1684 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1685 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1686 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1687 /* need to be adapted? */ 1688 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1689 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1690 /* print info */ 1691 if (pcbddc->dbg_flag) { 1692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1693 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1694 } 1695 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1696 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1697 } 1698 } 1699 } 1700 PetscFunctionReturn(0); 1701 } 1702 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1705 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1706 { 1707 PetscErrorCode ierr; 1708 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1709 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1710 1711 PetscFunctionBegin; 1712 if (!sub_schurs->reuse_mumps) { 1713 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1714 } 1715 if (!pcbddc->switch_static) { 1716 if (applytranspose && pcbddc->local_auxmat1) { 1717 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1718 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1719 } 1720 if (!sub_schurs->reuse_mumps) { 1721 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1722 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1723 } else { 1724 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1725 1726 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1727 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1728 } 1729 } else { 1730 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1731 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1732 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1733 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1734 if (applytranspose && pcbddc->local_auxmat1) { 1735 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1736 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1737 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1738 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1739 } 1740 } 1741 if (!sub_schurs->reuse_mumps) { 1742 if (applytranspose) { 1743 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1744 } else { 1745 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1746 } 1747 #if defined(PETSC_HAVE_MUMPS) 1748 } else { 1749 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1750 1751 if (applytranspose) { 1752 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1753 } else { 1754 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1755 } 1756 #endif 1757 } 1758 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1759 if (!pcbddc->switch_static) { 1760 if (!sub_schurs->reuse_mumps) { 1761 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1762 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1763 } else { 1764 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1765 1766 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1767 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1768 } 1769 if (!applytranspose && pcbddc->local_auxmat1) { 1770 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1771 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1772 } 1773 } else { 1774 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1775 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1776 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1777 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1778 if (!applytranspose && pcbddc->local_auxmat1) { 1779 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1780 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1781 } 1782 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1783 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1784 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1785 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1786 } 1787 PetscFunctionReturn(0); 1788 } 1789 1790 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1791 #undef __FUNCT__ 1792 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1793 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1794 { 1795 PetscErrorCode ierr; 1796 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1797 PC_IS* pcis = (PC_IS*) (pc->data); 1798 const PetscScalar zero = 0.0; 1799 1800 PetscFunctionBegin; 1801 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1802 if (applytranspose) { 1803 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1804 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1805 } else { 1806 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1807 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1808 } 1809 /* start communications from local primal nodes to rhs of coarse solver */ 1810 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1811 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1812 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1813 1814 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1815 /* TODO remove null space when doing multilevel */ 1816 if (pcbddc->coarse_ksp) { 1817 Vec rhs,sol; 1818 1819 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1820 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1821 if (applytranspose) { 1822 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1823 } else { 1824 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1825 } 1826 } 1827 1828 /* Local solution on R nodes */ 1829 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1830 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1831 } 1832 1833 /* communications from coarse sol to local primal nodes */ 1834 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1835 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1836 1837 /* Sum contributions from two levels */ 1838 if (applytranspose) { 1839 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1840 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1841 } else { 1842 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1843 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1844 } 1845 PetscFunctionReturn(0); 1846 } 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1850 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1851 { 1852 PetscErrorCode ierr; 1853 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1854 PetscScalar *array; 1855 Vec from,to; 1856 1857 PetscFunctionBegin; 1858 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1859 from = pcbddc->coarse_vec; 1860 to = pcbddc->vec1_P; 1861 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1862 Vec tvec; 1863 1864 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1865 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1866 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1867 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1868 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1869 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1870 } 1871 } else { /* from local to global -> put data in coarse right hand side */ 1872 from = pcbddc->vec1_P; 1873 to = pcbddc->coarse_vec; 1874 } 1875 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1876 PetscFunctionReturn(0); 1877 } 1878 1879 #undef __FUNCT__ 1880 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1881 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1882 { 1883 PetscErrorCode ierr; 1884 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1885 PetscScalar *array; 1886 Vec from,to; 1887 1888 PetscFunctionBegin; 1889 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1890 from = pcbddc->coarse_vec; 1891 to = pcbddc->vec1_P; 1892 } else { /* from local to global -> put data in coarse right hand side */ 1893 from = pcbddc->vec1_P; 1894 to = pcbddc->coarse_vec; 1895 } 1896 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1897 if (smode == SCATTER_FORWARD) { 1898 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1899 Vec tvec; 1900 1901 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1902 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1903 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1904 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1905 } 1906 } else { 1907 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1908 ierr = VecResetArray(from);CHKERRQ(ierr); 1909 } 1910 } 1911 PetscFunctionReturn(0); 1912 } 1913 1914 /* uncomment for testing purposes */ 1915 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1918 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1919 { 1920 PetscErrorCode ierr; 1921 PC_IS* pcis = (PC_IS*)(pc->data); 1922 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1923 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1924 /* one and zero */ 1925 PetscScalar one=1.0,zero=0.0; 1926 /* space to store constraints and their local indices */ 1927 PetscScalar *constraints_data; 1928 PetscInt *constraints_idxs,*constraints_idxs_B; 1929 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1930 PetscInt *constraints_n; 1931 /* iterators */ 1932 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1933 /* BLAS integers */ 1934 PetscBLASInt lwork,lierr; 1935 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1936 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1937 /* reuse */ 1938 PetscInt olocal_primal_size,olocal_primal_size_cc; 1939 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1940 /* change of basis */ 1941 PetscBool qr_needed; 1942 PetscBT change_basis,qr_needed_idx; 1943 /* auxiliary stuff */ 1944 PetscInt *nnz,*is_indices; 1945 PetscInt ncc; 1946 /* some quantities */ 1947 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1948 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1949 1950 PetscFunctionBegin; 1951 /* Destroy Mat objects computed previously */ 1952 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1953 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1954 /* save info on constraints from previous setup (if any) */ 1955 olocal_primal_size = pcbddc->local_primal_size; 1956 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1957 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1958 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1959 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1960 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1961 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1962 1963 /* print some info */ 1964 if (pcbddc->dbg_flag) { 1965 IS vertices; 1966 PetscInt nv,nedges,nfaces; 1967 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1968 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1969 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1970 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1971 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1972 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1973 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1974 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1975 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1976 } 1977 1978 if (!pcbddc->adaptive_selection) { 1979 IS ISForVertices,*ISForFaces,*ISForEdges; 1980 MatNullSpace nearnullsp; 1981 const Vec *nearnullvecs; 1982 Vec *localnearnullsp; 1983 PetscScalar *array; 1984 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1985 PetscBool nnsp_has_cnst; 1986 /* LAPACK working arrays for SVD or POD */ 1987 PetscBool skip_lapack,boolforchange; 1988 PetscScalar *work; 1989 PetscReal *singular_vals; 1990 #if defined(PETSC_USE_COMPLEX) 1991 PetscReal *rwork; 1992 #endif 1993 #if defined(PETSC_MISSING_LAPACK_GESVD) 1994 PetscScalar *temp_basis,*correlation_mat; 1995 #else 1996 PetscBLASInt dummy_int=1; 1997 PetscScalar dummy_scalar=1.; 1998 #endif 1999 2000 /* Get index sets for faces, edges and vertices from graph */ 2001 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2002 /* free unneeded index sets */ 2003 if (!pcbddc->use_vertices) { 2004 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2005 } 2006 if (!pcbddc->use_edges) { 2007 for (i=0;i<n_ISForEdges;i++) { 2008 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2009 } 2010 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2011 n_ISForEdges = 0; 2012 } 2013 if (!pcbddc->use_faces) { 2014 for (i=0;i<n_ISForFaces;i++) { 2015 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2016 } 2017 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2018 n_ISForFaces = 0; 2019 } 2020 2021 #if defined(PETSC_USE_DEBUG) 2022 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2023 Also use_change_of_basis should be consistent among processors */ 2024 if (pcbddc->NullSpace) { 2025 PetscBool tbool[2],gbool[2]; 2026 2027 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2028 pcbddc->use_change_of_basis = PETSC_TRUE; 2029 if (!ISForEdges) { 2030 pcbddc->use_change_on_faces = PETSC_TRUE; 2031 } 2032 } 2033 tbool[0] = pcbddc->use_change_of_basis; 2034 tbool[1] = pcbddc->use_change_on_faces; 2035 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2036 pcbddc->use_change_of_basis = gbool[0]; 2037 pcbddc->use_change_on_faces = gbool[1]; 2038 } 2039 #endif 2040 2041 /* check if near null space is attached to global mat */ 2042 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2043 if (nearnullsp) { 2044 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2045 /* remove any stored info */ 2046 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2047 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2048 /* store information for BDDC solver reuse */ 2049 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2050 pcbddc->onearnullspace = nearnullsp; 2051 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2052 for (i=0;i<nnsp_size;i++) { 2053 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2054 } 2055 } else { /* if near null space is not provided BDDC uses constants by default */ 2056 nnsp_size = 0; 2057 nnsp_has_cnst = PETSC_TRUE; 2058 } 2059 /* get max number of constraints on a single cc */ 2060 max_constraints = nnsp_size; 2061 if (nnsp_has_cnst) max_constraints++; 2062 2063 /* 2064 Evaluate maximum storage size needed by the procedure 2065 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2066 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2067 There can be multiple constraints per connected component 2068 */ 2069 n_vertices = 0; 2070 if (ISForVertices) { 2071 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2072 } 2073 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2074 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2075 2076 total_counts = n_ISForFaces+n_ISForEdges; 2077 total_counts *= max_constraints; 2078 total_counts += n_vertices; 2079 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2080 2081 total_counts = 0; 2082 max_size_of_constraint = 0; 2083 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2084 IS used_is; 2085 if (i<n_ISForEdges) { 2086 used_is = ISForEdges[i]; 2087 } else { 2088 used_is = ISForFaces[i-n_ISForEdges]; 2089 } 2090 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2091 total_counts += j; 2092 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2093 } 2094 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2095 2096 /* get local part of global near null space vectors */ 2097 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2098 for (k=0;k<nnsp_size;k++) { 2099 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2100 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2101 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2102 } 2103 2104 /* whether or not to skip lapack calls */ 2105 skip_lapack = PETSC_TRUE; 2106 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2107 2108 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2109 if (!skip_lapack) { 2110 PetscScalar temp_work; 2111 2112 #if defined(PETSC_MISSING_LAPACK_GESVD) 2113 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2114 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2115 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2116 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2117 #if defined(PETSC_USE_COMPLEX) 2118 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2119 #endif 2120 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2121 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2122 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2123 lwork = -1; 2124 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2125 #if !defined(PETSC_USE_COMPLEX) 2126 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2127 #else 2128 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2129 #endif 2130 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2131 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2132 #else /* on missing GESVD */ 2133 /* SVD */ 2134 PetscInt max_n,min_n; 2135 max_n = max_size_of_constraint; 2136 min_n = max_constraints; 2137 if (max_size_of_constraint < max_constraints) { 2138 min_n = max_size_of_constraint; 2139 max_n = max_constraints; 2140 } 2141 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2142 #if defined(PETSC_USE_COMPLEX) 2143 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2144 #endif 2145 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2146 lwork = -1; 2147 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2148 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2149 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2150 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2151 #if !defined(PETSC_USE_COMPLEX) 2152 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2153 #else 2154 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2155 #endif 2156 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2157 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2158 #endif /* on missing GESVD */ 2159 /* Allocate optimal workspace */ 2160 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2161 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2162 } 2163 /* Now we can loop on constraining sets */ 2164 total_counts = 0; 2165 constraints_idxs_ptr[0] = 0; 2166 constraints_data_ptr[0] = 0; 2167 /* vertices */ 2168 if (n_vertices) { 2169 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2170 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2171 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2172 for (i=0;i<n_vertices;i++) { 2173 constraints_n[total_counts] = 1; 2174 constraints_data[total_counts] = 1.0; 2175 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2176 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2177 total_counts++; 2178 } 2179 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2180 PetscBool used_vertex; 2181 for (i=0;i<n_vertices;i++) { 2182 used_vertex = PETSC_FALSE; 2183 k = 0; 2184 while (!used_vertex && k<nnsp_size) { 2185 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2186 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2187 constraints_n[total_counts] = 1; 2188 constraints_idxs[total_counts] = is_indices[i]; 2189 constraints_data[total_counts] = 1.0; 2190 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2191 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2192 total_counts++; 2193 used_vertex = PETSC_TRUE; 2194 } 2195 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2196 k++; 2197 } 2198 } 2199 } 2200 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2201 n_vertices = total_counts; 2202 } 2203 2204 /* edges and faces */ 2205 total_counts_cc = total_counts; 2206 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2207 IS used_is; 2208 PetscBool idxs_copied = PETSC_FALSE; 2209 2210 if (ncc<n_ISForEdges) { 2211 used_is = ISForEdges[ncc]; 2212 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2213 } else { 2214 used_is = ISForFaces[ncc-n_ISForEdges]; 2215 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2216 } 2217 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2218 2219 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2220 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2221 /* change of basis should not be performed on local periodic nodes */ 2222 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2223 if (nnsp_has_cnst) { 2224 PetscScalar quad_value; 2225 2226 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2227 idxs_copied = PETSC_TRUE; 2228 2229 if (!pcbddc->use_nnsp_true) { 2230 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2231 } else { 2232 quad_value = 1.0; 2233 } 2234 for (j=0;j<size_of_constraint;j++) { 2235 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2236 } 2237 temp_constraints++; 2238 total_counts++; 2239 } 2240 for (k=0;k<nnsp_size;k++) { 2241 PetscReal real_value; 2242 PetscScalar *ptr_to_data; 2243 2244 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2245 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2246 for (j=0;j<size_of_constraint;j++) { 2247 ptr_to_data[j] = array[is_indices[j]]; 2248 } 2249 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2250 /* check if array is null on the connected component */ 2251 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2252 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2253 if (real_value > 0.0) { /* keep indices and values */ 2254 temp_constraints++; 2255 total_counts++; 2256 if (!idxs_copied) { 2257 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2258 idxs_copied = PETSC_TRUE; 2259 } 2260 } 2261 } 2262 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2263 valid_constraints = temp_constraints; 2264 if (!pcbddc->use_nnsp_true && temp_constraints) { 2265 if (temp_constraints == 1) { /* just normalize the constraint */ 2266 PetscScalar norm,*ptr_to_data; 2267 2268 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2269 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2270 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2271 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2272 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2273 } else { /* perform SVD */ 2274 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2275 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2276 2277 #if defined(PETSC_MISSING_LAPACK_GESVD) 2278 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2279 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2280 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2281 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2282 from that computed using LAPACKgesvd 2283 -> This is due to a different computation of eigenvectors in LAPACKheev 2284 -> The quality of the POD-computed basis will be the same */ 2285 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2286 /* Store upper triangular part of correlation matrix */ 2287 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2288 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2289 for (j=0;j<temp_constraints;j++) { 2290 for (k=0;k<j+1;k++) { 2291 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2292 } 2293 } 2294 /* compute eigenvalues and eigenvectors of correlation matrix */ 2295 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2296 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2297 #if !defined(PETSC_USE_COMPLEX) 2298 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2299 #else 2300 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2301 #endif 2302 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2303 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2304 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2305 j = 0; 2306 while (j < temp_constraints && singular_vals[j] < tol) j++; 2307 total_counts = total_counts-j; 2308 valid_constraints = temp_constraints-j; 2309 /* scale and copy POD basis into used quadrature memory */ 2310 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2311 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2312 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2313 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2314 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2315 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2316 if (j<temp_constraints) { 2317 PetscInt ii; 2318 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2319 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2320 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2321 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2322 for (k=0;k<temp_constraints-j;k++) { 2323 for (ii=0;ii<size_of_constraint;ii++) { 2324 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2325 } 2326 } 2327 } 2328 #else /* on missing GESVD */ 2329 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2330 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2331 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2332 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2333 #if !defined(PETSC_USE_COMPLEX) 2334 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2335 #else 2336 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2337 #endif 2338 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2339 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2340 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2341 k = temp_constraints; 2342 if (k > size_of_constraint) k = size_of_constraint; 2343 j = 0; 2344 while (j < k && singular_vals[k-j-1] < tol) j++; 2345 valid_constraints = k-j; 2346 total_counts = total_counts-temp_constraints+valid_constraints; 2347 #endif /* on missing GESVD */ 2348 } 2349 } 2350 /* update pointers information */ 2351 if (valid_constraints) { 2352 constraints_n[total_counts_cc] = valid_constraints; 2353 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2354 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2355 /* set change_of_basis flag */ 2356 if (boolforchange) { 2357 PetscBTSet(change_basis,total_counts_cc); 2358 } 2359 total_counts_cc++; 2360 } 2361 } 2362 /* free workspace */ 2363 if (!skip_lapack) { 2364 ierr = PetscFree(work);CHKERRQ(ierr); 2365 #if defined(PETSC_USE_COMPLEX) 2366 ierr = PetscFree(rwork);CHKERRQ(ierr); 2367 #endif 2368 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2369 #if defined(PETSC_MISSING_LAPACK_GESVD) 2370 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2371 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2372 #endif 2373 } 2374 for (k=0;k<nnsp_size;k++) { 2375 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2376 } 2377 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2378 /* free index sets of faces, edges and vertices */ 2379 for (i=0;i<n_ISForFaces;i++) { 2380 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2381 } 2382 if (n_ISForFaces) { 2383 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2384 } 2385 for (i=0;i<n_ISForEdges;i++) { 2386 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2387 } 2388 if (n_ISForEdges) { 2389 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2390 } 2391 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2392 } else { 2393 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2394 2395 total_counts = 0; 2396 n_vertices = 0; 2397 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2398 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2399 } 2400 max_constraints = 0; 2401 total_counts_cc = 0; 2402 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2403 total_counts += pcbddc->adaptive_constraints_n[i]; 2404 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2405 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2406 } 2407 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2408 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2409 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2410 constraints_data = pcbddc->adaptive_constraints_data; 2411 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2412 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2413 total_counts_cc = 0; 2414 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2415 if (pcbddc->adaptive_constraints_n[i]) { 2416 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2417 } 2418 } 2419 #if 0 2420 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2421 for (i=0;i<total_counts_cc;i++) { 2422 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2423 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2424 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2425 printf(" %d",constraints_idxs[j]); 2426 } 2427 printf("\n"); 2428 printf("number of cc: %d\n",constraints_n[i]); 2429 } 2430 for (i=0;i<n_vertices;i++) { 2431 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2432 } 2433 for (i=0;i<sub_schurs->n_subs;i++) { 2434 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2435 } 2436 #endif 2437 2438 max_size_of_constraint = 0; 2439 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2440 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2441 /* Change of basis */ 2442 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2443 if (pcbddc->use_change_of_basis) { 2444 for (i=0;i<sub_schurs->n_subs;i++) { 2445 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2446 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2447 } 2448 } 2449 } 2450 } 2451 pcbddc->local_primal_size = total_counts; 2452 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2453 2454 /* map constraints_idxs in boundary numbering */ 2455 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2456 if (i != constraints_idxs_ptr[total_counts_cc]) { 2457 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2458 } 2459 2460 /* Create constraint matrix */ 2461 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2462 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2463 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2464 2465 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2466 /* determine if a QR strategy is needed for change of basis */ 2467 qr_needed = PETSC_FALSE; 2468 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2469 total_primal_vertices=0; 2470 pcbddc->local_primal_size_cc = 0; 2471 for (i=0;i<total_counts_cc;i++) { 2472 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2473 if (size_of_constraint == 1) { 2474 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2475 pcbddc->local_primal_size_cc += 1; 2476 } else if (PetscBTLookup(change_basis,i)) { 2477 for (k=0;k<constraints_n[i];k++) { 2478 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2479 } 2480 pcbddc->local_primal_size_cc += constraints_n[i]; 2481 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2482 PetscBTSet(qr_needed_idx,i); 2483 qr_needed = PETSC_TRUE; 2484 } 2485 } else { 2486 pcbddc->local_primal_size_cc += 1; 2487 } 2488 } 2489 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2490 pcbddc->n_vertices = total_primal_vertices; 2491 /* permute indices in order to have a sorted set of vertices */ 2492 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2493 2494 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2495 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2496 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2497 2498 /* nonzero structure of constraint matrix */ 2499 /* and get reference dof for local constraints */ 2500 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2501 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2502 2503 j = total_primal_vertices; 2504 total_counts = total_primal_vertices; 2505 cum = total_primal_vertices; 2506 for (i=n_vertices;i<total_counts_cc;i++) { 2507 if (!PetscBTLookup(change_basis,i)) { 2508 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2509 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2510 cum++; 2511 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2512 for (k=0;k<constraints_n[i];k++) { 2513 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2514 nnz[j+k] = size_of_constraint; 2515 } 2516 j += constraints_n[i]; 2517 } 2518 } 2519 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2520 ierr = PetscFree(nnz);CHKERRQ(ierr); 2521 2522 /* set values in constraint matrix */ 2523 for (i=0;i<total_primal_vertices;i++) { 2524 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2525 } 2526 total_counts = total_primal_vertices; 2527 for (i=n_vertices;i<total_counts_cc;i++) { 2528 if (!PetscBTLookup(change_basis,i)) { 2529 PetscInt *cols; 2530 2531 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2532 cols = constraints_idxs+constraints_idxs_ptr[i]; 2533 for (k=0;k<constraints_n[i];k++) { 2534 PetscInt row = total_counts+k; 2535 PetscScalar *vals; 2536 2537 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2538 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2539 } 2540 total_counts += constraints_n[i]; 2541 } 2542 } 2543 /* assembling */ 2544 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2545 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2546 2547 /* 2548 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2549 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2550 */ 2551 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2552 if (pcbddc->use_change_of_basis) { 2553 /* dual and primal dofs on a single cc */ 2554 PetscInt dual_dofs,primal_dofs; 2555 /* working stuff for GEQRF */ 2556 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2557 PetscBLASInt lqr_work; 2558 /* working stuff for UNGQR */ 2559 PetscScalar *gqr_work,lgqr_work_t; 2560 PetscBLASInt lgqr_work; 2561 /* working stuff for TRTRS */ 2562 PetscScalar *trs_rhs; 2563 PetscBLASInt Blas_NRHS; 2564 /* pointers for values insertion into change of basis matrix */ 2565 PetscInt *start_rows,*start_cols; 2566 PetscScalar *start_vals; 2567 /* working stuff for values insertion */ 2568 PetscBT is_primal; 2569 PetscInt *aux_primal_numbering_B; 2570 /* matrix sizes */ 2571 PetscInt global_size,local_size; 2572 /* temporary change of basis */ 2573 Mat localChangeOfBasisMatrix; 2574 /* extra space for debugging */ 2575 PetscScalar *dbg_work; 2576 2577 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2578 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2579 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2580 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2581 /* nonzeros for local mat */ 2582 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2583 for (i=0;i<pcis->n;i++) nnz[i]=1; 2584 for (i=n_vertices;i<total_counts_cc;i++) { 2585 if (PetscBTLookup(change_basis,i)) { 2586 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2587 if (PetscBTLookup(qr_needed_idx,i)) { 2588 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2589 } else { 2590 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2591 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2592 } 2593 } 2594 } 2595 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2596 ierr = PetscFree(nnz);CHKERRQ(ierr); 2597 /* Set initial identity in the matrix */ 2598 for (i=0;i<pcis->n;i++) { 2599 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2600 } 2601 2602 if (pcbddc->dbg_flag) { 2603 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2604 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2605 } 2606 2607 2608 /* Now we loop on the constraints which need a change of basis */ 2609 /* 2610 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2611 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2612 2613 Basic blocks of change of basis matrix T computed by 2614 2615 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2616 2617 | 1 0 ... 0 s_1/S | 2618 | 0 1 ... 0 s_2/S | 2619 | ... | 2620 | 0 ... 1 s_{n-1}/S | 2621 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2622 2623 with S = \sum_{i=1}^n s_i^2 2624 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2625 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2626 2627 - QR decomposition of constraints otherwise 2628 */ 2629 if (qr_needed) { 2630 /* space to store Q */ 2631 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2632 /* first we issue queries for optimal work */ 2633 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2634 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2635 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2636 lqr_work = -1; 2637 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2638 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2639 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2640 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2641 lgqr_work = -1; 2642 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2643 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2644 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2645 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2646 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2647 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2648 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2649 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2650 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2651 /* array to store scaling factors for reflectors */ 2652 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2653 /* array to store rhs and solution of triangular solver */ 2654 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2655 /* allocating workspace for check */ 2656 if (pcbddc->dbg_flag) { 2657 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2658 } 2659 } 2660 /* array to store whether a node is primal or not */ 2661 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2662 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2663 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2664 if (i != total_primal_vertices) { 2665 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2666 } 2667 for (i=0;i<total_primal_vertices;i++) { 2668 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2669 } 2670 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2671 2672 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2673 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2674 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2675 if (PetscBTLookup(change_basis,total_counts)) { 2676 /* get constraint info */ 2677 primal_dofs = constraints_n[total_counts]; 2678 dual_dofs = size_of_constraint-primal_dofs; 2679 2680 if (pcbddc->dbg_flag) { 2681 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2682 } 2683 2684 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2685 2686 /* copy quadrature constraints for change of basis check */ 2687 if (pcbddc->dbg_flag) { 2688 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2689 } 2690 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2691 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2692 2693 /* compute QR decomposition of constraints */ 2694 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2695 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2696 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2697 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2698 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2699 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2700 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2701 2702 /* explictly compute R^-T */ 2703 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2704 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2705 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2706 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2707 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2708 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2709 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2710 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2711 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2712 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2713 2714 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2715 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2716 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2717 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2718 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2719 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2720 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2721 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2722 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2723 2724 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2725 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2726 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2727 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2728 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2729 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2730 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2731 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2732 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2733 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2734 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2735 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2736 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2737 2738 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2739 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2740 /* insert cols for primal dofs */ 2741 for (j=0;j<primal_dofs;j++) { 2742 start_vals = &qr_basis[j*size_of_constraint]; 2743 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2744 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2745 } 2746 /* insert cols for dual dofs */ 2747 for (j=0,k=0;j<dual_dofs;k++) { 2748 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2749 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2750 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2751 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2752 j++; 2753 } 2754 } 2755 2756 /* check change of basis */ 2757 if (pcbddc->dbg_flag) { 2758 PetscInt ii,jj; 2759 PetscBool valid_qr=PETSC_TRUE; 2760 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2761 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2762 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2763 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2764 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2765 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2766 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2767 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2768 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2769 for (jj=0;jj<size_of_constraint;jj++) { 2770 for (ii=0;ii<primal_dofs;ii++) { 2771 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2772 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2773 } 2774 } 2775 if (!valid_qr) { 2776 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2777 for (jj=0;jj<size_of_constraint;jj++) { 2778 for (ii=0;ii<primal_dofs;ii++) { 2779 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2780 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2781 } 2782 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2783 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2784 } 2785 } 2786 } 2787 } else { 2788 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2789 } 2790 } 2791 } else { /* simple transformation block */ 2792 PetscInt row,col; 2793 PetscScalar val,norm; 2794 2795 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2796 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2797 for (j=0;j<size_of_constraint;j++) { 2798 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2799 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2800 if (!PetscBTLookup(is_primal,row_B)) { 2801 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2802 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2803 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2804 } else { 2805 for (k=0;k<size_of_constraint;k++) { 2806 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2807 if (row != col) { 2808 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2809 } else { 2810 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2811 } 2812 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2813 } 2814 } 2815 } 2816 if (pcbddc->dbg_flag) { 2817 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2818 } 2819 } 2820 } else { 2821 if (pcbddc->dbg_flag) { 2822 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2823 } 2824 } 2825 } 2826 2827 /* free workspace */ 2828 if (qr_needed) { 2829 if (pcbddc->dbg_flag) { 2830 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2831 } 2832 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2833 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2834 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2835 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2836 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2837 } 2838 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2839 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2840 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2841 2842 /* assembling of global change of variable */ 2843 { 2844 Mat tmat; 2845 PetscInt bs; 2846 2847 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2848 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2849 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2850 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2851 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2852 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2853 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2854 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2855 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2856 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2857 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2858 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2859 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2860 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2861 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2862 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2863 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2864 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2865 } 2866 /* check */ 2867 if (pcbddc->dbg_flag) { 2868 PetscReal error; 2869 Vec x,x_change; 2870 2871 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2872 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2873 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2874 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2875 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2876 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2877 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2878 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2879 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2880 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2881 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2882 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2883 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2884 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2885 ierr = VecDestroy(&x);CHKERRQ(ierr); 2886 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2887 } 2888 2889 /* adapt sub_schurs computed (if any) */ 2890 if (pcbddc->use_deluxe_scaling) { 2891 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2892 if (sub_schurs->S_Ej_all) { 2893 Mat S_new,tmat; 2894 IS is_all_N; 2895 2896 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2897 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2898 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2899 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2900 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2901 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2902 sub_schurs->S_Ej_all = S_new; 2903 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2904 if (sub_schurs->sum_S_Ej_all) { 2905 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2906 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2907 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2908 sub_schurs->sum_S_Ej_all = S_new; 2909 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2910 } 2911 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2912 } 2913 } 2914 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2915 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2916 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2917 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2918 } 2919 2920 /* set up change of basis context */ 2921 if (pcbddc->ChangeOfBasisMatrix) { 2922 PCBDDCChange_ctx change_ctx; 2923 2924 if (!pcbddc->new_global_mat) { 2925 PetscInt global_size,local_size; 2926 2927 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2928 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2929 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2930 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2931 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2932 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2933 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2934 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2935 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2936 } else { 2937 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2938 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2939 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2940 } 2941 if (!pcbddc->user_ChangeOfBasisMatrix) { 2942 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2943 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2944 } else { 2945 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2946 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2947 } 2948 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2949 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2950 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2951 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2952 } 2953 2954 /* check if a new primal space has been introduced */ 2955 pcbddc->new_primal_space_local = PETSC_TRUE; 2956 if (olocal_primal_size == pcbddc->local_primal_size) { 2957 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2958 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2959 if (!pcbddc->new_primal_space_local) { 2960 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2961 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2962 } 2963 } 2964 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2965 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2966 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2967 2968 /* flush dbg viewer */ 2969 if (pcbddc->dbg_flag) { 2970 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2971 } 2972 2973 /* free workspace */ 2974 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2975 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2976 if (!pcbddc->adaptive_selection) { 2977 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2978 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2979 } else { 2980 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2981 pcbddc->adaptive_constraints_idxs_ptr, 2982 pcbddc->adaptive_constraints_data_ptr, 2983 pcbddc->adaptive_constraints_idxs, 2984 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2985 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2986 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2987 } 2988 PetscFunctionReturn(0); 2989 } 2990 2991 #undef __FUNCT__ 2992 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2993 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2994 { 2995 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2996 PC_IS *pcis = (PC_IS*)pc->data; 2997 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2998 PetscInt ierr,i,vertex_size,N; 2999 PetscViewer viewer=pcbddc->dbg_viewer; 3000 3001 PetscFunctionBegin; 3002 /* Reset previously computed graph */ 3003 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3004 /* Init local Graph struct */ 3005 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3006 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 3007 3008 /* Check validity of the csr graph passed in by the user */ 3009 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3010 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3011 } 3012 3013 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3014 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3015 PetscInt *xadj,*adjncy; 3016 PetscInt nvtxs; 3017 PetscBool flg_row=PETSC_FALSE; 3018 3019 if (pcbddc->use_local_adj) { 3020 3021 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3022 if (flg_row) { 3023 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3024 pcbddc->computed_rowadj = PETSC_TRUE; 3025 } 3026 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3027 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3028 IS is_dummy; 3029 ISLocalToGlobalMapping l2gmap_dummy; 3030 PetscInt j,sum; 3031 PetscInt *cxadj,*cadjncy; 3032 const PetscInt *idxs; 3033 PCBDDCGraph graph; 3034 PetscBT is_on_boundary; 3035 3036 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3037 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3038 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3039 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3040 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3041 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3042 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3043 if (flg_row) { 3044 graph->xadj = xadj; 3045 graph->adjncy = adjncy; 3046 } 3047 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3048 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3049 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3050 3051 if (pcbddc->dbg_flag) { 3052 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3053 for (i=0;i<graph->ncc;i++) { 3054 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3055 } 3056 } 3057 3058 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3059 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3060 for (i=0;i<pcis->n_B;i++) { 3061 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3062 } 3063 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3064 3065 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3066 sum = 0; 3067 for (i=0;i<graph->ncc;i++) { 3068 PetscInt sizecc = 0; 3069 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3070 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3071 sizecc++; 3072 } 3073 } 3074 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3075 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3076 cxadj[graph->queue[j]] = sizecc; 3077 } 3078 } 3079 sum += sizecc*sizecc; 3080 } 3081 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3082 sum = 0; 3083 for (i=0;i<pcis->n;i++) { 3084 PetscInt temp = cxadj[i]; 3085 cxadj[i] = sum; 3086 sum += temp; 3087 } 3088 cxadj[pcis->n] = sum; 3089 for (i=0;i<graph->ncc;i++) { 3090 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3091 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3092 PetscInt k,sizecc = 0; 3093 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3094 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3095 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3096 sizecc++; 3097 } 3098 } 3099 } 3100 } 3101 } 3102 if (sum) { 3103 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3104 } else { 3105 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3106 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3107 } 3108 graph->xadj = 0; 3109 graph->adjncy = 0; 3110 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3111 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3112 } 3113 } 3114 if (pcbddc->dbg_flag) { 3115 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3116 } 3117 3118 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3119 vertex_size = 1; 3120 if (pcbddc->user_provided_isfordofs) { 3121 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3122 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3123 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3124 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3125 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3126 } 3127 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3128 pcbddc->n_ISForDofs = 0; 3129 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3130 } 3131 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3132 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3133 } else { 3134 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3135 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3136 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3137 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3138 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3139 } 3140 } 3141 } 3142 3143 /* Setup of Graph */ 3144 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3145 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3146 } 3147 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3148 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3149 } 3150 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3151 3152 /* Graph's connected components analysis */ 3153 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3154 3155 /* print some info to stdout */ 3156 if (pcbddc->dbg_flag) { 3157 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3158 } 3159 3160 /* mark topography has done */ 3161 pcbddc->recompute_topography = PETSC_FALSE; 3162 PetscFunctionReturn(0); 3163 } 3164 3165 #undef __FUNCT__ 3166 #define __FUNCT__ "PCBDDCSubsetNumbering" 3167 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3168 { 3169 Vec local_vec,global_vec; 3170 IS seqis,paris; 3171 VecScatter scatter_ctx; 3172 PetscScalar *array; 3173 PetscInt *temp_global_dofs; 3174 PetscScalar globalsum; 3175 PetscInt i,j,s; 3176 PetscInt nlocals,first_index,old_index,max_local,max_global; 3177 PetscMPIInt rank_prec_comm,size_prec_comm; 3178 PetscInt *dof_sizes,*dof_displs; 3179 PetscBool first_found; 3180 PetscErrorCode ierr; 3181 3182 PetscFunctionBegin; 3183 /* mpi buffers */ 3184 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3185 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3186 j = ( !rank_prec_comm ? size_prec_comm : 0); 3187 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3188 /* get maximum size of subset */ 3189 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3190 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3191 max_local = 0; 3192 for (i=0;i<n_local_dofs;i++) { 3193 if (max_local < temp_global_dofs[i] ) { 3194 max_local = temp_global_dofs[i]; 3195 } 3196 } 3197 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3198 max_global++; 3199 max_local = 0; 3200 for (i=0;i<n_local_dofs;i++) { 3201 if (max_local < local_dofs[i] ) { 3202 max_local = local_dofs[i]; 3203 } 3204 } 3205 max_local++; 3206 /* allocate workspace */ 3207 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3208 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3209 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3210 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3211 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3212 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3213 /* create scatter */ 3214 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3215 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3216 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3217 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3218 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3219 /* init array */ 3220 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3221 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3222 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3223 if (local_dofs_mult) { 3224 for (i=0;i<n_local_dofs;i++) { 3225 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3226 } 3227 } else { 3228 for (i=0;i<n_local_dofs;i++) { 3229 array[local_dofs[i]]=1.0; 3230 } 3231 } 3232 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3233 /* scatter into global vec and get total number of global dofs */ 3234 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3235 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3236 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3237 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3238 /* Fill global_vec with cumulative function for global numbering */ 3239 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3240 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3241 nlocals = 0; 3242 first_index = -1; 3243 first_found = PETSC_FALSE; 3244 for (i=0;i<s;i++) { 3245 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3246 first_found = PETSC_TRUE; 3247 first_index = i; 3248 } 3249 nlocals += (PetscInt)PetscRealPart(array[i]); 3250 } 3251 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3252 if (!rank_prec_comm) { 3253 dof_displs[0]=0; 3254 for (i=1;i<size_prec_comm;i++) { 3255 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3256 } 3257 } 3258 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3259 if (first_found) { 3260 array[first_index] += (PetscScalar)nlocals; 3261 old_index = first_index; 3262 for (i=first_index+1;i<s;i++) { 3263 if (PetscRealPart(array[i]) > 0.1) { 3264 array[i] += array[old_index]; 3265 old_index = i; 3266 } 3267 } 3268 } 3269 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3270 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3271 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3272 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3273 /* get global ordering of local dofs */ 3274 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3275 if (local_dofs_mult) { 3276 for (i=0;i<n_local_dofs;i++) { 3277 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3278 } 3279 } else { 3280 for (i=0;i<n_local_dofs;i++) { 3281 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3282 } 3283 } 3284 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3285 /* free workspace */ 3286 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3287 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3288 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3289 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3290 /* return pointer to global ordering of local dofs */ 3291 *global_numbering_subset = temp_global_dofs; 3292 PetscFunctionReturn(0); 3293 } 3294 3295 #undef __FUNCT__ 3296 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3297 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3298 { 3299 PetscInt i,j; 3300 PetscScalar *alphas; 3301 PetscErrorCode ierr; 3302 3303 PetscFunctionBegin; 3304 /* this implements stabilized Gram-Schmidt */ 3305 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3306 for (i=0;i<n;i++) { 3307 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3308 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3309 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3310 } 3311 ierr = PetscFree(alphas);CHKERRQ(ierr); 3312 PetscFunctionReturn(0); 3313 } 3314 3315 #undef __FUNCT__ 3316 #define __FUNCT__ "MatISGetSubassemblingPattern" 3317 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3318 { 3319 Mat subdomain_adj; 3320 IS new_ranks,ranks_send_to; 3321 MatPartitioning partitioner; 3322 Mat_IS *matis; 3323 PetscInt n_neighs,*neighs,*n_shared,**shared; 3324 PetscInt prank; 3325 PetscMPIInt size,rank,color; 3326 PetscInt *xadj,*adjncy,*oldranks; 3327 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3328 PetscInt i,local_size,threshold=0; 3329 PetscErrorCode ierr; 3330 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3331 PetscSubcomm subcomm; 3332 3333 PetscFunctionBegin; 3334 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3335 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3336 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3337 3338 /* Get info on mapping */ 3339 matis = (Mat_IS*)(mat->data); 3340 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3341 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3342 3343 /* build local CSR graph of subdomains' connectivity */ 3344 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3345 xadj[0] = 0; 3346 xadj[1] = PetscMax(n_neighs-1,0); 3347 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3348 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3349 3350 if (threshold) { 3351 PetscInt xadj_count = 0; 3352 for (i=1;i<n_neighs;i++) { 3353 if (n_shared[i] > threshold) { 3354 adjncy[xadj_count] = neighs[i]; 3355 adjncy_wgt[xadj_count] = n_shared[i]; 3356 xadj_count++; 3357 } 3358 } 3359 xadj[1] = xadj_count; 3360 } else { 3361 if (xadj[1]) { 3362 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3363 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3364 } 3365 } 3366 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3367 if (use_square) { 3368 for (i=0;i<xadj[1];i++) { 3369 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3370 } 3371 } 3372 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3373 3374 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3375 3376 /* 3377 Restrict work on active processes only. 3378 */ 3379 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3380 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3381 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3382 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3383 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3384 if (color) { 3385 ierr = PetscFree(xadj);CHKERRQ(ierr); 3386 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3387 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3388 } else { 3389 PetscInt coarsening_ratio; 3390 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3391 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3392 prank = rank; 3393 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3394 /* 3395 for (i=0;i<size;i++) { 3396 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3397 } 3398 */ 3399 for (i=0;i<xadj[1];i++) { 3400 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3401 } 3402 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3403 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3404 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3405 3406 /* Partition */ 3407 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3408 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3409 if (use_vwgt) { 3410 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3411 v_wgt[0] = local_size; 3412 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3413 } 3414 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3415 coarsening_ratio = size/n_subdomains; 3416 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3417 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3418 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3419 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3420 3421 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3422 if (contiguous) { 3423 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3424 } else { 3425 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3426 } 3427 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3428 /* clean up */ 3429 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3430 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3431 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3432 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3433 } 3434 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3435 3436 /* assemble parallel IS for sends */ 3437 i = 1; 3438 if (color) i=0; 3439 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3440 3441 /* get back IS */ 3442 *is_sends = ranks_send_to; 3443 PetscFunctionReturn(0); 3444 } 3445 3446 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3447 3448 #undef __FUNCT__ 3449 #define __FUNCT__ "MatISSubassemble" 3450 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3451 { 3452 Mat local_mat; 3453 Mat_IS *matis; 3454 IS is_sends_internal; 3455 PetscInt rows,cols,new_local_rows; 3456 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3457 PetscBool ismatis,isdense,newisdense,destroy_mat; 3458 ISLocalToGlobalMapping l2gmap; 3459 PetscInt* l2gmap_indices; 3460 const PetscInt* is_indices; 3461 MatType new_local_type; 3462 /* buffers */ 3463 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3464 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3465 PetscInt *recv_buffer_idxs_local; 3466 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3467 /* MPI */ 3468 MPI_Comm comm,comm_n; 3469 PetscSubcomm subcomm; 3470 PetscMPIInt n_sends,n_recvs,commsize; 3471 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3472 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3473 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3474 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3475 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3476 PetscErrorCode ierr; 3477 3478 PetscFunctionBegin; 3479 /* TODO: add missing checks */ 3480 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3481 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3482 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3483 PetscValidLogicalCollectiveInt(mat,nis,7); 3484 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3485 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3486 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3487 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3488 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3489 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3490 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3491 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3492 PetscInt mrows,mcols,mnrows,mncols; 3493 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3494 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3495 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3496 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3497 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3498 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3499 } 3500 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3501 PetscValidLogicalCollectiveInt(mat,bs,0); 3502 /* prepare IS for sending if not provided */ 3503 if (!is_sends) { 3504 PetscBool pcontig = PETSC_TRUE; 3505 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3506 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3507 } else { 3508 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3509 is_sends_internal = is_sends; 3510 } 3511 3512 /* get pointer of MATIS data */ 3513 matis = (Mat_IS*)mat->data; 3514 3515 /* get comm */ 3516 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3517 3518 /* compute number of sends */ 3519 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3520 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3521 3522 /* compute number of receives */ 3523 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3524 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3525 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3526 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3527 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3528 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3529 ierr = PetscFree(iflags);CHKERRQ(ierr); 3530 3531 /* restrict comm if requested */ 3532 subcomm = 0; 3533 destroy_mat = PETSC_FALSE; 3534 if (restrict_comm) { 3535 PetscMPIInt color,subcommsize; 3536 3537 color = 0; 3538 if (!n_recvs && n_sends) color = 1; /* processes sending and not receiving anything will not partecipate in new comm */ 3539 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3540 subcommsize = commsize - subcommsize; 3541 /* check if reuse has been requested */ 3542 if (reuse == MAT_REUSE_MATRIX) { 3543 if (*mat_n) { 3544 PetscMPIInt subcommsize2; 3545 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3546 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3547 comm_n = PetscObjectComm((PetscObject)*mat_n); 3548 } else { 3549 comm_n = PETSC_COMM_SELF; 3550 } 3551 } else { /* MAT_INITIAL_MATRIX */ 3552 PetscMPIInt rank; 3553 3554 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3555 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3556 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3557 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3558 comm_n = PetscSubcommChild(subcomm); 3559 } 3560 /* flag to destroy *mat_n if not significative */ 3561 if (color) destroy_mat = PETSC_TRUE; 3562 } else { 3563 comm_n = comm; 3564 } 3565 3566 /* prepare send/receive buffers */ 3567 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3568 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3569 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3570 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3571 if (nis) { 3572 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3573 } 3574 3575 /* Get data from local matrices */ 3576 if (!isdense) { 3577 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3578 /* TODO: See below some guidelines on how to prepare the local buffers */ 3579 /* 3580 send_buffer_vals should contain the raw values of the local matrix 3581 send_buffer_idxs should contain: 3582 - MatType_PRIVATE type 3583 - PetscInt size_of_l2gmap 3584 - PetscInt global_row_indices[size_of_l2gmap] 3585 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3586 */ 3587 } else { 3588 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3589 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3590 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3591 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3592 send_buffer_idxs[1] = i; 3593 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3594 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3595 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3596 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3597 for (i=0;i<n_sends;i++) { 3598 ilengths_vals[is_indices[i]] = len*len; 3599 ilengths_idxs[is_indices[i]] = len+2; 3600 } 3601 } 3602 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3603 /* additional is (if any) */ 3604 if (nis) { 3605 PetscMPIInt psum; 3606 PetscInt j; 3607 for (j=0,psum=0;j<nis;j++) { 3608 PetscInt plen; 3609 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3610 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3611 psum += len+1; /* indices + lenght */ 3612 } 3613 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3614 for (j=0,psum=0;j<nis;j++) { 3615 PetscInt plen; 3616 const PetscInt *is_array_idxs; 3617 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3618 send_buffer_idxs_is[psum] = plen; 3619 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3620 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3621 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3622 psum += plen+1; /* indices + lenght */ 3623 } 3624 for (i=0;i<n_sends;i++) { 3625 ilengths_idxs_is[is_indices[i]] = psum; 3626 } 3627 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3628 } 3629 3630 buf_size_idxs = 0; 3631 buf_size_vals = 0; 3632 buf_size_idxs_is = 0; 3633 for (i=0;i<n_recvs;i++) { 3634 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3635 buf_size_vals += (PetscInt)olengths_vals[i]; 3636 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3637 } 3638 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3639 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3640 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3641 3642 /* get new tags for clean communications */ 3643 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3644 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3645 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3646 3647 /* allocate for requests */ 3648 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3649 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3650 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3651 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3652 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3653 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3654 3655 /* communications */ 3656 ptr_idxs = recv_buffer_idxs; 3657 ptr_vals = recv_buffer_vals; 3658 ptr_idxs_is = recv_buffer_idxs_is; 3659 for (i=0;i<n_recvs;i++) { 3660 source_dest = onodes[i]; 3661 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3662 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3663 ptr_idxs += olengths_idxs[i]; 3664 ptr_vals += olengths_vals[i]; 3665 if (nis) { 3666 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3667 ptr_idxs_is += olengths_idxs_is[i]; 3668 } 3669 } 3670 for (i=0;i<n_sends;i++) { 3671 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3672 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3673 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3674 if (nis) { 3675 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3676 } 3677 } 3678 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3679 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3680 3681 /* assemble new l2g map */ 3682 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3683 ptr_idxs = recv_buffer_idxs; 3684 new_local_rows = 0; 3685 for (i=0;i<n_recvs;i++) { 3686 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3687 ptr_idxs += olengths_idxs[i]; 3688 } 3689 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3690 ptr_idxs = recv_buffer_idxs; 3691 new_local_rows = 0; 3692 for (i=0;i<n_recvs;i++) { 3693 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3694 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3695 ptr_idxs += olengths_idxs[i]; 3696 } 3697 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3698 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3699 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3700 3701 /* infer new local matrix type from received local matrices type */ 3702 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3703 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3704 if (n_recvs) { 3705 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3706 ptr_idxs = recv_buffer_idxs; 3707 for (i=0;i<n_recvs;i++) { 3708 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3709 new_local_type_private = MATAIJ_PRIVATE; 3710 break; 3711 } 3712 ptr_idxs += olengths_idxs[i]; 3713 } 3714 switch (new_local_type_private) { 3715 case MATDENSE_PRIVATE: 3716 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3717 new_local_type = MATSEQAIJ; 3718 bs = 1; 3719 } else { /* if I receive only 1 dense matrix */ 3720 new_local_type = MATSEQDENSE; 3721 bs = 1; 3722 } 3723 break; 3724 case MATAIJ_PRIVATE: 3725 new_local_type = MATSEQAIJ; 3726 bs = 1; 3727 break; 3728 case MATBAIJ_PRIVATE: 3729 new_local_type = MATSEQBAIJ; 3730 break; 3731 case MATSBAIJ_PRIVATE: 3732 new_local_type = MATSEQSBAIJ; 3733 break; 3734 default: 3735 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3736 break; 3737 } 3738 } else { /* by default, new_local_type is seqdense */ 3739 new_local_type = MATSEQDENSE; 3740 bs = 1; 3741 } 3742 3743 /* create MATIS object if needed */ 3744 if (reuse == MAT_INITIAL_MATRIX) { 3745 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3746 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3747 } else { 3748 /* it also destroys the local matrices */ 3749 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3750 } 3751 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3752 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3753 3754 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3755 3756 /* Global to local map of received indices */ 3757 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3758 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3759 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3760 3761 /* restore attributes -> type of incoming data and its size */ 3762 buf_size_idxs = 0; 3763 for (i=0;i<n_recvs;i++) { 3764 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3765 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3766 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3767 } 3768 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3769 3770 /* set preallocation */ 3771 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3772 if (!newisdense) { 3773 PetscInt *new_local_nnz=0; 3774 3775 ptr_vals = recv_buffer_vals; 3776 ptr_idxs = recv_buffer_idxs_local; 3777 if (n_recvs) { 3778 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3779 } 3780 for (i=0;i<n_recvs;i++) { 3781 PetscInt j; 3782 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3783 for (j=0;j<*(ptr_idxs+1);j++) { 3784 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3785 } 3786 } else { 3787 /* TODO */ 3788 } 3789 ptr_idxs += olengths_idxs[i]; 3790 } 3791 if (new_local_nnz) { 3792 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3793 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3794 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3795 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3796 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3797 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3798 } else { 3799 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3800 } 3801 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3802 } else { 3803 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3804 } 3805 3806 /* set values */ 3807 ptr_vals = recv_buffer_vals; 3808 ptr_idxs = recv_buffer_idxs_local; 3809 for (i=0;i<n_recvs;i++) { 3810 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3811 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3812 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3813 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3814 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3815 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3816 } else { 3817 /* TODO */ 3818 } 3819 ptr_idxs += olengths_idxs[i]; 3820 ptr_vals += olengths_vals[i]; 3821 } 3822 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3823 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3824 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3825 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3826 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3827 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3828 3829 #if 0 3830 if (!restrict_comm) { /* check */ 3831 Vec lvec,rvec; 3832 PetscReal infty_error; 3833 3834 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3835 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3836 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3837 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3838 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3839 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3840 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3841 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3842 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3843 } 3844 #endif 3845 3846 /* assemble new additional is (if any) */ 3847 if (nis) { 3848 PetscInt **temp_idxs,*count_is,j,psum; 3849 3850 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3851 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3852 ptr_idxs = recv_buffer_idxs_is; 3853 psum = 0; 3854 for (i=0;i<n_recvs;i++) { 3855 for (j=0;j<nis;j++) { 3856 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3857 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3858 psum += plen; 3859 ptr_idxs += plen+1; /* shift pointer to received data */ 3860 } 3861 } 3862 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3863 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3864 for (i=1;i<nis;i++) { 3865 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3866 } 3867 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3868 ptr_idxs = recv_buffer_idxs_is; 3869 for (i=0;i<n_recvs;i++) { 3870 for (j=0;j<nis;j++) { 3871 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3872 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3873 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3874 ptr_idxs += plen+1; /* shift pointer to received data */ 3875 } 3876 } 3877 for (i=0;i<nis;i++) { 3878 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3879 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3880 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3881 } 3882 ierr = PetscFree(count_is);CHKERRQ(ierr); 3883 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3884 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3885 } 3886 /* free workspace */ 3887 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3888 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3889 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3890 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3891 if (isdense) { 3892 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3893 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3894 } else { 3895 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3896 } 3897 if (nis) { 3898 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3899 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3900 } 3901 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3902 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3903 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3904 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3905 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3906 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3907 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3908 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3909 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3910 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3911 ierr = PetscFree(onodes);CHKERRQ(ierr); 3912 if (nis) { 3913 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3914 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3915 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3916 } 3917 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3918 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3919 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3920 for (i=0;i<nis;i++) { 3921 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3922 } 3923 } 3924 PetscFunctionReturn(0); 3925 } 3926 3927 /* temporary hack into ksp private data structure */ 3928 #include <petsc/private/kspimpl.h> 3929 3930 #undef __FUNCT__ 3931 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3932 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3933 { 3934 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3935 PC_IS *pcis = (PC_IS*)pc->data; 3936 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3937 MatNullSpace CoarseNullSpace=NULL; 3938 ISLocalToGlobalMapping coarse_islg; 3939 IS coarse_is,*isarray; 3940 PetscInt i,im_active=-1,active_procs=-1; 3941 PetscInt nis,nisdofs,nisneu; 3942 PC pc_temp; 3943 PCType coarse_pc_type; 3944 KSPType coarse_ksp_type; 3945 PetscBool multilevel_requested,multilevel_allowed; 3946 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3947 Mat t_coarse_mat_is; 3948 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3949 PetscMPIInt all_procs; 3950 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3951 PetscBool compute_vecs = PETSC_FALSE; 3952 PetscScalar *array; 3953 PetscErrorCode ierr; 3954 3955 PetscFunctionBegin; 3956 /* Assign global numbering to coarse dofs */ 3957 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3958 PetscInt ocoarse_size; 3959 compute_vecs = PETSC_TRUE; 3960 ocoarse_size = pcbddc->coarse_size; 3961 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3962 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3963 /* see if we can avoid some work */ 3964 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3965 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3966 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3967 coarse_reuse = PETSC_FALSE; 3968 } else { /* we can safely reuse already computed coarse matrix */ 3969 coarse_reuse = PETSC_TRUE; 3970 } 3971 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3972 coarse_reuse = PETSC_FALSE; 3973 } 3974 /* reset any subassembling information */ 3975 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3976 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3977 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3978 coarse_reuse = PETSC_TRUE; 3979 } 3980 3981 /* count "active" (i.e. with positive local size) and "void" processes */ 3982 im_active = !!(pcis->n); 3983 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3984 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3985 void_procs = all_procs-active_procs; 3986 csin_type_simple = PETSC_TRUE; 3987 redist = PETSC_FALSE; 3988 if (pcbddc->current_level && void_procs) { 3989 csin_ml = PETSC_TRUE; 3990 ncoarse_ml = void_procs; 3991 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3992 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3993 csin_ds = PETSC_TRUE; 3994 ncoarse_ds = pcbddc->redistribute_coarse; 3995 redist = PETSC_TRUE; 3996 } else { 3997 csin_ds = PETSC_TRUE; 3998 ncoarse_ds = active_procs; 3999 redist = PETSC_TRUE; 4000 } 4001 } else { 4002 csin_ml = PETSC_FALSE; 4003 ncoarse_ml = all_procs; 4004 if (void_procs) { 4005 csin_ds = PETSC_TRUE; 4006 ncoarse_ds = void_procs; 4007 csin_type_simple = PETSC_FALSE; 4008 } else { 4009 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4010 csin_ds = PETSC_TRUE; 4011 ncoarse_ds = pcbddc->redistribute_coarse; 4012 redist = PETSC_TRUE; 4013 } else { 4014 csin_ds = PETSC_FALSE; 4015 ncoarse_ds = all_procs; 4016 } 4017 } 4018 } 4019 4020 /* 4021 test if we can go multilevel: three conditions must be satisfied: 4022 - we have not exceeded the number of levels requested 4023 - we can actually subassemble the active processes 4024 - we can find a suitable number of MPI processes where we can place the subassembled problem 4025 */ 4026 multilevel_allowed = PETSC_FALSE; 4027 multilevel_requested = PETSC_FALSE; 4028 if (pcbddc->current_level < pcbddc->max_levels) { 4029 multilevel_requested = PETSC_TRUE; 4030 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4031 multilevel_allowed = PETSC_FALSE; 4032 } else { 4033 multilevel_allowed = PETSC_TRUE; 4034 } 4035 } 4036 /* determine number of process partecipating to coarse solver */ 4037 if (multilevel_allowed) { 4038 ncoarse = ncoarse_ml; 4039 csin = csin_ml; 4040 redist = PETSC_FALSE; 4041 } else { 4042 ncoarse = ncoarse_ds; 4043 csin = csin_ds; 4044 } 4045 4046 /* creates temporary l2gmap and IS for coarse indexes */ 4047 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4048 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4049 4050 /* creates temporary MATIS object for coarse matrix */ 4051 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4052 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4053 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4054 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4055 #if 0 4056 { 4057 PetscViewer viewer; 4058 char filename[256]; 4059 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4060 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4061 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4062 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4063 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4064 } 4065 #endif 4066 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4067 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4068 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4069 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4070 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4071 4072 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4073 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4074 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4075 const PetscInt *idxs; 4076 ISLocalToGlobalMapping tmap; 4077 4078 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4079 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4080 /* allocate space for temporary storage */ 4081 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4082 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4083 /* allocate for IS array */ 4084 nisdofs = pcbddc->n_ISForDofsLocal; 4085 nisneu = !!pcbddc->NeumannBoundariesLocal; 4086 nis = nisdofs + nisneu; 4087 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4088 /* dofs splitting */ 4089 for (i=0;i<nisdofs;i++) { 4090 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4091 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4092 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4093 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4094 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4095 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4096 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4097 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4098 } 4099 /* neumann boundaries */ 4100 if (pcbddc->NeumannBoundariesLocal) { 4101 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4102 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4103 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4104 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4105 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4106 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4107 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4108 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4109 } 4110 /* free memory */ 4111 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4112 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4113 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4114 } else { 4115 nis = 0; 4116 nisdofs = 0; 4117 nisneu = 0; 4118 isarray = NULL; 4119 } 4120 /* destroy no longer needed map */ 4121 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4122 4123 /* restrict on coarse candidates (if needed) */ 4124 coarse_mat_is = NULL; 4125 if (csin) { 4126 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4127 if (redist) { 4128 PetscMPIInt rank; 4129 PetscInt spc,n_spc_p1,dest[1],destsize; 4130 4131 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4132 spc = active_procs/ncoarse; 4133 n_spc_p1 = active_procs%ncoarse; 4134 if (im_active) { 4135 destsize = 1; 4136 if (rank > n_spc_p1*(spc+1)-1) { 4137 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4138 } else { 4139 dest[0] = rank/(spc+1); 4140 } 4141 } else { 4142 destsize = 0; 4143 } 4144 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4145 } else if (csin_type_simple) { 4146 PetscMPIInt rank; 4147 PetscInt issize,isidx; 4148 4149 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4150 if (im_active) { 4151 issize = 1; 4152 isidx = (PetscInt)rank; 4153 } else { 4154 issize = 0; 4155 isidx = -1; 4156 } 4157 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4158 } else { /* get a suitable subassembling pattern from MATIS code */ 4159 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4160 } 4161 4162 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4163 if (!redist || ncoarse <= void_procs) { 4164 PetscInt ncoarse_cand,tissize,*nisindices; 4165 PetscInt *coarse_candidates; 4166 const PetscInt* tisindices; 4167 4168 /* get coarse candidates' ranks in pc communicator */ 4169 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4170 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4171 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4172 if (!coarse_candidates[i]) { 4173 coarse_candidates[ncoarse_cand++]=i; 4174 } 4175 } 4176 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4177 4178 4179 if (pcbddc->dbg_flag) { 4180 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4181 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4182 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4183 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4184 for (i=0;i<ncoarse_cand;i++) { 4185 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4186 } 4187 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4188 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4189 } 4190 /* shift the pattern on coarse candidates */ 4191 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4192 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4193 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4194 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4195 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4196 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4197 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4198 } 4199 if (pcbddc->dbg_flag) { 4200 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4201 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4202 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4203 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4204 } 4205 } 4206 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4207 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4208 } else { 4209 if (pcbddc->dbg_flag) { 4210 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4211 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4212 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4213 } 4214 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4215 coarse_mat_is = t_coarse_mat_is; 4216 } 4217 4218 /* create local to global scatters for coarse problem */ 4219 if (compute_vecs) { 4220 PetscInt lrows; 4221 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4222 if (coarse_mat_is) { 4223 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4224 } else { 4225 lrows = 0; 4226 } 4227 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4228 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4229 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4230 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4231 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4232 } 4233 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4234 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4235 4236 /* set defaults for coarse KSP and PC */ 4237 if (multilevel_allowed) { 4238 coarse_ksp_type = KSPRICHARDSON; 4239 coarse_pc_type = PCBDDC; 4240 } else { 4241 coarse_ksp_type = KSPPREONLY; 4242 coarse_pc_type = PCREDUNDANT; 4243 } 4244 4245 /* print some info if requested */ 4246 if (pcbddc->dbg_flag) { 4247 if (!multilevel_allowed) { 4248 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4249 if (multilevel_requested) { 4250 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4251 } else if (pcbddc->max_levels) { 4252 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4253 } 4254 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4255 } 4256 } 4257 4258 /* create the coarse KSP object only once with defaults */ 4259 if (coarse_mat_is) { 4260 MatReuse coarse_mat_reuse; 4261 PetscViewer dbg_viewer = NULL; 4262 if (pcbddc->dbg_flag) { 4263 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4264 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4265 } 4266 if (!pcbddc->coarse_ksp) { 4267 char prefix[256],str_level[16]; 4268 size_t len; 4269 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4270 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4271 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4272 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4273 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4274 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4275 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4276 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4277 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4278 /* prefix */ 4279 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4280 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4281 if (!pcbddc->current_level) { 4282 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4283 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4284 } else { 4285 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4286 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4287 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4288 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4289 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4290 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4291 } 4292 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4293 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4294 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4295 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4296 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4297 /* allow user customization */ 4298 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4299 } 4300 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4301 if (nisdofs) { 4302 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4303 for (i=0;i<nisdofs;i++) { 4304 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4305 } 4306 } 4307 if (nisneu) { 4308 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4309 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4310 } 4311 4312 /* get some info after set from options */ 4313 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4314 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4315 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4316 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4317 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4318 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4319 isbddc = PETSC_FALSE; 4320 } 4321 if (isredundant) { 4322 KSP inner_ksp; 4323 PC inner_pc; 4324 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4325 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4326 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4327 } 4328 4329 /* assemble coarse matrix */ 4330 if (coarse_reuse) { 4331 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4332 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4333 coarse_mat_reuse = MAT_REUSE_MATRIX; 4334 } else { 4335 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4336 } 4337 if (isbddc || isnn) { 4338 if (pcbddc->coarsening_ratio > 1) { 4339 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4340 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4341 if (pcbddc->dbg_flag) { 4342 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4343 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4344 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4345 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4346 } 4347 } 4348 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4349 } else { 4350 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4351 coarse_mat = coarse_mat_is; 4352 } 4353 } else { 4354 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4355 } 4356 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4357 4358 /* propagate symmetry info of coarse matrix */ 4359 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4360 if (pc->pmat->symmetric_set) { 4361 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4362 } 4363 if (pc->pmat->hermitian_set) { 4364 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4365 } 4366 if (pc->pmat->spd_set) { 4367 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4368 } 4369 /* set operators */ 4370 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4371 if (pcbddc->dbg_flag) { 4372 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4373 } 4374 } else { /* processes non partecipating to coarse solver (if any) */ 4375 coarse_mat = 0; 4376 } 4377 ierr = PetscFree(isarray);CHKERRQ(ierr); 4378 #if 0 4379 { 4380 PetscViewer viewer; 4381 char filename[256]; 4382 sprintf(filename,"coarse_mat.m"); 4383 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4384 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4385 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4386 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4387 } 4388 #endif 4389 4390 /* Compute coarse null space (special handling by BDDC only) */ 4391 if (pcbddc->NullSpace) { 4392 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4393 } 4394 4395 if (pcbddc->coarse_ksp) { 4396 Vec crhs,csol; 4397 PetscBool ispreonly; 4398 if (CoarseNullSpace) { 4399 if (isbddc) { 4400 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4401 } else { 4402 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4403 } 4404 } 4405 /* setup coarse ksp */ 4406 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4407 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4408 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4409 /* hack */ 4410 if (!csol) { 4411 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4412 } 4413 if (!crhs) { 4414 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4415 } 4416 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4417 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4418 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4419 KSP check_ksp; 4420 KSPType check_ksp_type; 4421 PC check_pc; 4422 Vec check_vec,coarse_vec; 4423 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4424 PetscInt its; 4425 PetscBool compute_eigs; 4426 PetscReal *eigs_r,*eigs_c; 4427 PetscInt neigs; 4428 const char *prefix; 4429 4430 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4431 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4432 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4433 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4434 if (ispreonly) { 4435 check_ksp_type = KSPPREONLY; 4436 compute_eigs = PETSC_FALSE; 4437 } else { 4438 check_ksp_type = KSPGMRES; 4439 compute_eigs = PETSC_TRUE; 4440 } 4441 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4442 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4443 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4444 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4445 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4446 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4447 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4448 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4449 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4450 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4451 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4452 /* create random vec */ 4453 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4454 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4455 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4456 if (CoarseNullSpace) { 4457 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4458 } 4459 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4460 /* solve coarse problem */ 4461 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4462 if (CoarseNullSpace) { 4463 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4464 } 4465 /* set eigenvalue estimation if preonly has not been requested */ 4466 if (compute_eigs) { 4467 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4468 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4469 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4470 lambda_max = eigs_r[neigs-1]; 4471 lambda_min = eigs_r[0]; 4472 if (pcbddc->use_coarse_estimates) { 4473 if (lambda_max>lambda_min) { 4474 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4475 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4476 } 4477 } 4478 } 4479 4480 /* check coarse problem residual error */ 4481 if (pcbddc->dbg_flag) { 4482 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4483 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4484 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4485 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4486 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4487 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4488 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4489 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4490 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4491 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4492 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4493 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4494 if (compute_eigs) { 4495 PetscReal lambda_max_s,lambda_min_s; 4496 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4497 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4498 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4499 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4500 for (i=0;i<neigs;i++) { 4501 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4502 } 4503 } 4504 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4505 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4506 } 4507 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4508 if (compute_eigs) { 4509 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4510 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4511 } 4512 } 4513 } 4514 /* print additional info */ 4515 if (pcbddc->dbg_flag) { 4516 /* waits until all processes reaches this point */ 4517 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4518 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4519 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4520 } 4521 4522 /* free memory */ 4523 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4524 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4525 PetscFunctionReturn(0); 4526 } 4527 4528 #undef __FUNCT__ 4529 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4530 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4531 { 4532 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4533 PC_IS* pcis = (PC_IS*)pc->data; 4534 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4535 PetscInt i,local_size,coarse_size=0; 4536 PetscInt *local_primal_indices=NULL; 4537 PetscInt *t_local_primal_indices=NULL; 4538 PetscErrorCode ierr; 4539 4540 PetscFunctionBegin; 4541 /* Compute global number of coarse dofs */ 4542 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4543 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4544 } 4545 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr); 4546 ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr); 4547 local_size = 0; 4548 for (i=0;i<pcbddc->local_primal_size_cc;i++) { 4549 PetscInt j; 4550 for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j; 4551 } 4552 ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr); 4553 if (local_size != pcbddc->local_primal_size) { 4554 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4555 } 4556 4557 /* check numbering */ 4558 if (pcbddc->dbg_flag) { 4559 PetscScalar coarsesum,*array; 4560 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4561 4562 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4563 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4564 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4565 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4566 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4567 for (i=0;i<pcbddc->local_primal_size;i++) { 4568 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4569 } 4570 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4571 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4572 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4573 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4574 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4575 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4576 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4577 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4578 for (i=0;i<pcis->n;i++) { 4579 if (array[i] == 1.0) { 4580 set_error = PETSC_TRUE; 4581 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4582 } 4583 } 4584 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4585 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4586 for (i=0;i<pcis->n;i++) { 4587 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4588 } 4589 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4590 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4591 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4592 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4593 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4594 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4595 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4596 PetscInt *gidxs; 4597 4598 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4599 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4600 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4601 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4602 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4603 for (i=0;i<pcbddc->local_primal_size;i++) { 4604 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4605 } 4606 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4607 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4608 } 4609 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4610 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4611 } 4612 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4613 /* get back data */ 4614 *coarse_size_n = coarse_size; 4615 *local_primal_indices_n = local_primal_indices; 4616 PetscFunctionReturn(0); 4617 } 4618 4619 #undef __FUNCT__ 4620 #define __FUNCT__ "PCBDDCGlobalToLocal" 4621 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4622 { 4623 IS localis_t; 4624 PetscInt i,lsize,*idxs,n; 4625 PetscScalar *vals; 4626 PetscErrorCode ierr; 4627 4628 PetscFunctionBegin; 4629 /* get indices in local ordering exploiting local to global map */ 4630 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4631 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4632 for (i=0;i<lsize;i++) vals[i] = 1.0; 4633 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4634 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4635 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4636 if (idxs) { /* multilevel guard */ 4637 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4638 } 4639 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4640 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4641 ierr = PetscFree(vals);CHKERRQ(ierr); 4642 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4643 /* now compute set in local ordering */ 4644 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4645 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4646 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4647 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4648 for (i=0,lsize=0;i<n;i++) { 4649 if (PetscRealPart(vals[i]) > 0.5) { 4650 lsize++; 4651 } 4652 } 4653 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4654 for (i=0,lsize=0;i<n;i++) { 4655 if (PetscRealPart(vals[i]) > 0.5) { 4656 idxs[lsize++] = i; 4657 } 4658 } 4659 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4660 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4661 *localis = localis_t; 4662 PetscFunctionReturn(0); 4663 } 4664 4665 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4666 #undef __FUNCT__ 4667 #define __FUNCT__ "PCBDDCMatMult_Private" 4668 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4669 { 4670 PCBDDCChange_ctx change_ctx; 4671 PetscErrorCode ierr; 4672 4673 PetscFunctionBegin; 4674 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4675 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4676 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4677 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4678 PetscFunctionReturn(0); 4679 } 4680 4681 #undef __FUNCT__ 4682 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4683 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4684 { 4685 PCBDDCChange_ctx change_ctx; 4686 PetscErrorCode ierr; 4687 4688 PetscFunctionBegin; 4689 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4690 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4691 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4692 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4693 PetscFunctionReturn(0); 4694 } 4695 4696 #undef __FUNCT__ 4697 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4698 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4699 { 4700 PC_IS *pcis=(PC_IS*)pc->data; 4701 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4702 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4703 Mat S_j; 4704 PetscInt *used_xadj,*used_adjncy; 4705 PetscBool free_used_adj; 4706 PetscErrorCode ierr; 4707 4708 PetscFunctionBegin; 4709 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4710 free_used_adj = PETSC_FALSE; 4711 if (pcbddc->sub_schurs_layers == -1) { 4712 used_xadj = NULL; 4713 used_adjncy = NULL; 4714 } else { 4715 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4716 used_xadj = pcbddc->mat_graph->xadj; 4717 used_adjncy = pcbddc->mat_graph->adjncy; 4718 } else if (pcbddc->computed_rowadj) { 4719 used_xadj = pcbddc->mat_graph->xadj; 4720 used_adjncy = pcbddc->mat_graph->adjncy; 4721 } else { 4722 PetscBool flg_row=PETSC_FALSE; 4723 const PetscInt *xadj,*adjncy; 4724 PetscInt nvtxs; 4725 4726 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4727 if (flg_row) { 4728 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4729 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4730 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4731 free_used_adj = PETSC_TRUE; 4732 } else { 4733 pcbddc->sub_schurs_layers = -1; 4734 used_xadj = NULL; 4735 used_adjncy = NULL; 4736 } 4737 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4738 } 4739 } 4740 4741 /* setup sub_schurs data */ 4742 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4743 if (!sub_schurs->use_mumps) { 4744 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4745 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4746 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4747 } else { 4748 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4749 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4750 } 4751 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4752 4753 /* free adjacency */ 4754 if (free_used_adj) { 4755 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4756 } 4757 PetscFunctionReturn(0); 4758 } 4759 4760 #undef __FUNCT__ 4761 #define __FUNCT__ "PCBDDCInitSubSchurs" 4762 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4763 { 4764 PC_IS *pcis=(PC_IS*)pc->data; 4765 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4766 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4767 PCBDDCGraph graph; 4768 PetscErrorCode ierr; 4769 4770 PetscFunctionBegin; 4771 /* attach interface graph for determining subsets */ 4772 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4773 IS verticesIS,verticescomm; 4774 PetscInt vsize,*idxs; 4775 4776 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4777 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4778 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4779 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4780 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4781 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4782 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4783 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4784 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4785 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4786 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4787 /* 4788 if (pcbddc->dbg_flag) { 4789 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4790 } 4791 */ 4792 } else { 4793 graph = pcbddc->mat_graph; 4794 } 4795 4796 /* sub_schurs init */ 4797 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4798 4799 /* free graph struct */ 4800 if (pcbddc->sub_schurs_rebuild) { 4801 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4802 } 4803 PetscFunctionReturn(0); 4804 } 4805