1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 51 PetscInt subset_size; 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 } 56 57 /* min/max and threshold */ 58 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 59 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 60 nmax = PetscMax(nmin,nmax); 61 allocated_S_St = PETSC_FALSE; 62 if (nmin) { 63 allocated_S_St = PETSC_TRUE; 64 } 65 66 /* allocate lapack workspace */ 67 cum = cum2 = 0; 68 maxneigs = 0; 69 for (i=0;i<sub_schurs->n_subs;i++) { 70 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 71 PetscInt n,subset_size; 72 73 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 74 n = PetscMin(subset_size,nmax); 75 cum += subset_size; 76 cum2 += subset_size*n; 77 maxneigs = PetscMax(maxneigs,n); 78 } 79 } 80 if (mss) { 81 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 82 PetscBLASInt B_itype = 1; 83 PetscBLASInt B_N = mss; 84 PetscReal zero = 0.0; 85 PetscReal eps = 0.0; /* dlamch? */ 86 87 B_lwork = -1; 88 S = NULL; 89 St = NULL; 90 eigs = NULL; 91 eigv = NULL; 92 B_iwork = NULL; 93 B_ifail = NULL; 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 112 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 PetscInt j,subset_size; 155 156 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 157 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 158 const PetscInt *idxs; 159 PetscReal infty = PETSC_MAX_REAL; 160 PetscInt eigs_start = 0; 161 PetscBLASInt B_N; 162 PetscBool same_data = PETSC_FALSE; 163 164 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 165 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 166 if (sub_schurs->is_hermitian) { 167 PetscInt j,k; 168 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 169 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 170 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 171 } 172 for (j=0;j<subset_size;j++) { 173 for (k=j;k<subset_size;k++) { 174 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 175 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 176 } 177 } 178 } else { 179 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 180 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 181 } 182 } else { 183 S = Sarray + cumarray; 184 St = Starray + cumarray; 185 } 186 187 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 188 /* see if we can save some work */ 189 if (sub_schurs->n_subs == 1) { 190 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 191 } 192 193 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 194 PetscPrintf(PETSC_COMM_SELF,"[%d] SAME DATA!!!!!!!!!!\n",PetscGlobalRank); 195 B_neigs = 0; 196 } else { 197 /* Threshold: this is an heuristic for edges */ 198 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 199 200 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 201 PetscBLASInt B_itype = 1; 202 PetscBLASInt B_IL, B_IU; 203 PetscReal eps = -1.0; /* dlamch? */ 204 PetscInt nmin_s; 205 206 /* ask for eigenvalues larger than thresh */ 207 if (pcbddc->dbg_flag) { 208 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 209 } 210 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 211 #if defined(PETSC_USE_COMPLEX) 212 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 213 #else 214 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 215 #endif 216 ierr = PetscFPTrapPop();CHKERRQ(ierr); 217 if (B_ierr) { 218 if (B_ierr < 0 ) { 219 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 220 } else if (B_ierr <= B_N) { 221 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 222 } else { 223 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 224 } 225 } 226 227 if (B_neigs > nmax) { 228 if (pcbddc->dbg_flag) { 229 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 230 } 231 eigs_start = B_neigs -nmax; 232 B_neigs = nmax; 233 } 234 235 nmin_s = PetscMin(nmin,B_N); 236 if (B_neigs < nmin_s) { 237 PetscBLASInt B_neigs2; 238 239 B_IU = B_N - B_neigs; 240 B_IL = B_N - nmin_s + 1; 241 if (pcbddc->dbg_flag) { 242 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 243 } 244 if (sub_schurs->is_hermitian) { 245 PetscInt j; 246 for (j=0;j<subset_size;j++) { 247 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 248 } 249 for (j=0;j<subset_size;j++) { 250 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 251 } 252 } else { 253 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 254 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 255 } 256 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 257 #if defined(PETSC_USE_COMPLEX) 258 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 259 #else 260 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 261 #endif 262 ierr = PetscFPTrapPop();CHKERRQ(ierr); 263 B_neigs += B_neigs2; 264 } 265 if (B_ierr) { 266 if (B_ierr < 0 ) { 267 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 268 } else if (B_ierr <= B_N) { 269 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 270 } else { 271 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 272 } 273 } 274 if (pcbddc->dbg_flag) { 275 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 276 for (j=0;j<B_neigs;j++) { 277 if (eigs[j] == 0.0) { 278 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 279 } else { 280 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]); 281 } 282 } 283 } 284 } else { 285 /* TODO */ 286 } 287 } 288 maxneigs = PetscMax(B_neigs,maxneigs); 289 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 290 if (B_neigs) { 291 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 292 293 if (pcbddc->dbg_flag > 1) { 294 PetscInt ii; 295 for (ii=0;ii<B_neigs;ii++) { 296 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 297 for (j=0;j<B_N;j++) { 298 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 299 } 300 } 301 } 302 #if 0 303 for (j=0;j<B_neigs;j++) { 304 PetscBLASInt Blas_N,Blas_one = 1.0; 305 PetscScalar norm; 306 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 307 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 308 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 309 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 310 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 311 } else { 312 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 313 } 314 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 315 } 316 #endif 317 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 318 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 319 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 320 cum++; 321 } 322 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 323 } 324 /* shift for next computation */ 325 cumarray += subset_size*subset_size; 326 } 327 if (pcbddc->dbg_flag) { 328 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 329 } 330 331 if (mss) { 332 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 333 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 334 /* destroy matrices (junk) */ 335 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 336 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 337 } 338 if (allocated_S_St) { 339 ierr = PetscFree2(S,St);CHKERRQ(ierr); 340 } 341 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 342 #if defined(PETSC_USE_COMPLEX) 343 ierr = PetscFree(rwork);CHKERRQ(ierr); 344 #endif 345 if (pcbddc->dbg_flag) { 346 PetscInt maxneigs_r; 347 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 348 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 349 } 350 PetscFunctionReturn(0); 351 } 352 353 #undef __FUNCT__ 354 #define __FUNCT__ "PCBDDCSetUpSolvers" 355 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 356 { 357 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 358 PetscScalar *coarse_submat_vals; 359 PetscErrorCode ierr; 360 361 PetscFunctionBegin; 362 /* Setup local scatters R_to_B and (optionally) R_to_D */ 363 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 364 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 365 366 /* Setup local neumann solver ksp_R */ 367 /* PCBDDCSetUpLocalScatters should be called first! */ 368 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 369 370 /* Change global null space passed in by the user if change of basis has been requested */ 371 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 372 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 373 } 374 375 /* 376 Setup local correction and local part of coarse basis. 377 Gives back the dense local part of the coarse matrix in column major ordering 378 */ 379 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 380 381 /* Compute total number of coarse nodes and setup coarse solver */ 382 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 383 384 /* free */ 385 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "PCBDDCResetCustomization" 391 PetscErrorCode PCBDDCResetCustomization(PC pc) 392 { 393 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 394 PetscErrorCode ierr; 395 396 PetscFunctionBegin; 397 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 398 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 399 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 400 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 401 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 402 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 403 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 404 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 405 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 406 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 407 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 408 PetscFunctionReturn(0); 409 } 410 411 #undef __FUNCT__ 412 #define __FUNCT__ "PCBDDCResetTopography" 413 PetscErrorCode PCBDDCResetTopography(PC pc) 414 { 415 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 416 PetscErrorCode ierr; 417 418 PetscFunctionBegin; 419 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 420 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 421 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 422 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 423 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "PCBDDCResetSolvers" 429 PetscErrorCode PCBDDCResetSolvers(PC pc) 430 { 431 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 432 PetscScalar *array; 433 PetscErrorCode ierr; 434 435 PetscFunctionBegin; 436 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 437 if (pcbddc->coarse_phi_B) { 438 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 439 ierr = PetscFree(array);CHKERRQ(ierr); 440 } 441 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 442 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 443 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 444 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 445 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 446 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 447 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 448 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 449 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 450 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 451 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 452 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 453 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 454 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 455 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 456 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 457 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 458 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 459 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 460 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 461 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 462 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 463 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 #undef __FUNCT__ 468 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 469 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 470 { 471 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 472 PC_IS *pcis = (PC_IS*)pc->data; 473 VecType impVecType; 474 PetscInt n_constraints,n_R,old_size; 475 PetscErrorCode ierr; 476 477 PetscFunctionBegin; 478 if (!pcbddc->ConstraintMatrix) { 479 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 480 } 481 /* get sizes */ 482 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 483 n_R = pcis->n-pcbddc->n_vertices; 484 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 485 /* local work vectors (try to avoid unneeded work)*/ 486 /* R nodes */ 487 old_size = -1; 488 if (pcbddc->vec1_R) { 489 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 490 } 491 if (n_R != old_size) { 492 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 493 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 494 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 495 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 496 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 497 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 498 } 499 /* local primal dofs */ 500 old_size = -1; 501 if (pcbddc->vec1_P) { 502 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 503 } 504 if (pcbddc->local_primal_size != old_size) { 505 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 506 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 507 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 508 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 509 } 510 /* local explicit constraints */ 511 old_size = -1; 512 if (pcbddc->vec1_C) { 513 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 514 } 515 if (n_constraints && n_constraints != old_size) { 516 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 517 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 518 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 519 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 520 } 521 PetscFunctionReturn(0); 522 } 523 524 #undef __FUNCT__ 525 #define __FUNCT__ "PCBDDCSetUpCorrection" 526 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 527 { 528 PetscErrorCode ierr; 529 /* pointers to pcis and pcbddc */ 530 PC_IS* pcis = (PC_IS*)pc->data; 531 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 532 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 533 /* submatrices of local problem */ 534 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 535 /* submatrices of local coarse problem */ 536 Mat S_VV,S_CV,S_VC,S_CC; 537 /* working matrices */ 538 Mat C_CR; 539 /* additional working stuff */ 540 PC pc_R; 541 Mat F; 542 PetscBool isLU,isCHOL,isILU; 543 544 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 545 PetscScalar *work; 546 PetscInt *idx_V_B; 547 PetscInt n,n_vertices,n_constraints; 548 PetscInt i,n_R,n_D,n_B; 549 PetscBool unsymmetric_check; 550 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 551 MatType impMatType; 552 /* some shortcuts to scalars */ 553 PetscScalar one=1.0,m_one=-1.0; 554 555 PetscFunctionBegin; 556 n_vertices = pcbddc->n_vertices; 557 n_constraints = pcbddc->local_primal_size-n_vertices; 558 /* Set Non-overlapping dimensions */ 559 n_B = pcis->n_B; 560 n_D = pcis->n - n_B; 561 n_R = pcis->n - n_vertices; 562 563 /* Set types for local objects needed by BDDC precondtioner */ 564 impMatType = MATSEQDENSE; 565 566 /* vertices in boundary numbering */ 567 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 568 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 569 if (i != n_vertices) { 570 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 571 } 572 573 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 574 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 575 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 576 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 577 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 578 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 579 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 580 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 581 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 582 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 583 584 unsymmetric_check = PETSC_FALSE; 585 /* allocate workspace */ 586 n = 0; 587 if (n_constraints) { 588 n += n_R*n_constraints; 589 } 590 if (n_vertices) { 591 n = PetscMax(2*n_R*n_vertices,n); 592 n = PetscMax((n_R+n_B)*n_vertices,n); 593 } 594 if (!pcbddc->issym) { 595 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 596 unsymmetric_check = PETSC_TRUE; 597 } 598 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 599 600 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 601 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 602 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 603 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 604 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 605 if (isLU || isILU || isCHOL) { 606 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 607 } else if (sub_schurs->reuse_mumps) { 608 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 609 MatFactorType type; 610 611 F = reuse_mumps->F; 612 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 613 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 614 } else { 615 F = NULL; 616 } 617 618 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 619 if (n_constraints) { 620 Mat M1,M2,M3; 621 Mat auxmat; 622 IS is_aux; 623 PetscScalar *array,*array2; 624 625 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 626 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 627 628 /* Extract constraints on R nodes: C_{CR} */ 629 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 630 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 631 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 632 633 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 634 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 635 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 636 for (i=0;i<n_constraints;i++) { 637 const PetscScalar *row_cmat_values; 638 const PetscInt *row_cmat_indices; 639 PetscInt size_of_constraint,j; 640 641 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 642 for (j=0;j<size_of_constraint;j++) { 643 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 644 } 645 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 646 } 647 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 648 if (F) { 649 Mat B; 650 651 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 652 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 653 ierr = MatDestroy(&B);CHKERRQ(ierr); 654 } else { 655 PetscScalar *marr; 656 657 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 658 for (i=0;i<n_constraints;i++) { 659 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 660 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 661 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 662 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 663 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 664 } 665 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 666 } 667 if (!pcbddc->switch_static) { 668 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 669 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 670 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 671 for (i=0;i<n_constraints;i++) { 672 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 673 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 674 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 675 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 676 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 677 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 678 } 679 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 680 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 681 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 682 } else { 683 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 684 pcbddc->local_auxmat2 = local_auxmat2_R; 685 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 686 } 687 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 688 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 689 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 690 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 691 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 692 if (isCHOL) { 693 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 694 } else { 695 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 696 } 697 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 698 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 699 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 700 ierr = MatDestroy(&M2);CHKERRQ(ierr); 701 ierr = MatDestroy(&M3);CHKERRQ(ierr); 702 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 703 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 704 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 705 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 706 ierr = MatDestroy(&M1);CHKERRQ(ierr); 707 } 708 /* Get submatrices from subdomain matrix */ 709 if (n_vertices) { 710 Mat newmat; 711 IS is_aux; 712 PetscInt ibs,mbs; 713 PetscBool issbaij; 714 715 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 716 IS tis; 717 718 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 719 ierr = ISSort(tis);CHKERRQ(ierr); 720 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 721 ierr = ISDestroy(&tis);CHKERRQ(ierr); 722 } else { 723 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 724 } 725 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 726 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 727 if (ibs != mbs) { /* need to convert to SEQAIJ */ 728 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 729 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 730 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 731 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 732 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 733 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 734 } else { 735 /* this is safe */ 736 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 737 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 738 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 739 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 740 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 741 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 742 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 743 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 744 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 745 } else { 746 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 747 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 748 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 749 } 750 /* need to convert to SeqAIJ for MatMatMult with dense matrix */ 751 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 752 } 753 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 754 } 755 756 /* Matrix of coarse basis functions (local) */ 757 if (pcbddc->coarse_phi_B) { 758 PetscInt on_B,on_primal,on_D=n_D; 759 if (pcbddc->coarse_phi_D) { 760 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 761 } 762 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 763 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 764 PetscScalar *marray; 765 766 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 767 ierr = PetscFree(marray);CHKERRQ(ierr); 768 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 769 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 770 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 771 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 772 } 773 } 774 775 if (!pcbddc->coarse_phi_B) { 776 PetscScalar *marray; 777 778 n = n_B*pcbddc->local_primal_size; 779 if (pcbddc->switch_static || pcbddc->dbg_flag) { 780 n += n_D*pcbddc->local_primal_size; 781 } 782 if (!pcbddc->issym) { 783 n *= 2; 784 } 785 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 786 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 787 n = n_B*pcbddc->local_primal_size; 788 if (pcbddc->switch_static || pcbddc->dbg_flag) { 789 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 790 n += n_D*pcbddc->local_primal_size; 791 } 792 if (!pcbddc->issym) { 793 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 794 if (pcbddc->switch_static || pcbddc->dbg_flag) { 795 n = n_B*pcbddc->local_primal_size; 796 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 797 } 798 } else { 799 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 800 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 801 if (pcbddc->switch_static || pcbddc->dbg_flag) { 802 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 803 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 804 } 805 } 806 } 807 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 808 /* vertices */ 809 if (n_vertices) { 810 811 if (n_R) { 812 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 813 PetscBLASInt B_N,B_one = 1; 814 PetscScalar *x,*y; 815 816 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 817 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 818 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 819 if (F) { /* TODO could be optimized for symmetric problems */ 820 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 821 } else { 822 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 823 for (i=0;i<n_vertices;i++) { 824 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 825 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 826 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 827 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 828 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 829 } 830 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 831 } 832 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 833 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 834 if (n_constraints) { 835 Mat B; 836 837 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 838 for (i=0;i<n_vertices;i++) { 839 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 840 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 841 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 842 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 844 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 845 } 846 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 847 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 848 ierr = MatDestroy(&B);CHKERRQ(ierr); 849 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 850 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 851 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 852 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 853 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 854 ierr = MatDestroy(&B);CHKERRQ(ierr); 855 } 856 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 857 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 858 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 859 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 860 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 861 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 862 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 863 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 864 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 865 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 866 } else { 867 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 868 } 869 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 870 /* coarse basis functions */ 871 for (i=0;i<n_vertices;i++) { 872 PetscScalar *y; 873 874 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 875 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 876 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 877 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 878 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 879 y[n_B*i+idx_V_B[i]] = 1.0; 880 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 881 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 882 883 if (pcbddc->switch_static || pcbddc->dbg_flag) { 884 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 885 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 886 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 888 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 889 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 890 } 891 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 892 } 893 } 894 895 if (n_constraints) { 896 Mat B; 897 898 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 899 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 900 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 901 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 902 if (n_vertices) { 903 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 904 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 905 } else { 906 Mat S_VCt; 907 908 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 909 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 910 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 911 } 912 } 913 ierr = MatDestroy(&B);CHKERRQ(ierr); 914 /* coarse basis functions */ 915 for (i=0;i<n_constraints;i++) { 916 PetscScalar *y; 917 918 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 919 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 920 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 921 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 923 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 924 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 925 if (pcbddc->switch_static || pcbddc->dbg_flag) { 926 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 927 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 928 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 929 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 930 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 931 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 932 } 933 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 934 } 935 } 936 if (n_constraints) { 937 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 938 } 939 940 /* compute other basis functions for non-symmetric problems */ 941 if (!pcbddc->issym) { 942 943 if (n_constraints) { 944 Mat S_CCT,B_C; 945 946 /* this is a lazy thing */ 947 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 948 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 949 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 950 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 951 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 952 if (n_vertices) { 953 Mat B_V,S_VCT; 954 955 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 956 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 957 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 958 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 959 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 960 } 961 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 962 } else { /* if there are no constraints, reset work */ 963 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 964 } 965 if (n_vertices && n_R) { 966 Mat A_VRT; 967 PetscScalar *marray; 968 PetscBLASInt B_N,B_one = 1; 969 970 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 971 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 972 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 973 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 974 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 975 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 976 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 977 } 978 979 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 980 for (i=0;i<pcbddc->local_primal_size;i++) { 981 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 982 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 983 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 984 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 985 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 986 } 987 } else { 988 for (i=0;i<pcbddc->local_primal_size;i++) { 989 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 990 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 991 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 992 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 993 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 994 } 995 } 996 /* coarse basis functions */ 997 for (i=0;i<pcbddc->local_primal_size;i++) { 998 PetscScalar *y; 999 1000 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1001 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1002 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1003 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 if (i<n_vertices) { 1006 y[n_B*i+idx_V_B[i]] = 1.0; 1007 } 1008 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1009 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1010 1011 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1012 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1013 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1014 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1015 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1016 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1017 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1018 } 1019 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1020 } 1021 } 1022 /* free memory */ 1023 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1024 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1025 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1026 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1027 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1028 ierr = PetscFree(work);CHKERRQ(ierr); 1029 if (n_vertices) { 1030 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1031 } 1032 if (n_constraints) { 1033 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1034 } 1035 /* Checking coarse_sub_mat and coarse basis functios */ 1036 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1037 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1038 if (pcbddc->dbg_flag) { 1039 Mat coarse_sub_mat; 1040 Mat AUXMAT,TM1,TM2,TM3,TM4; 1041 Mat coarse_phi_D,coarse_phi_B; 1042 Mat coarse_psi_D,coarse_psi_B; 1043 Mat A_II,A_BB,A_IB,A_BI; 1044 Mat C_B,CPHI; 1045 IS is_dummy; 1046 Vec mones; 1047 MatType checkmattype=MATSEQAIJ; 1048 PetscReal real_value; 1049 1050 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1051 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1052 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1053 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1054 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1055 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1056 if (unsymmetric_check) { 1057 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1058 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1059 } 1060 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1061 1062 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1063 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 1064 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1065 if (unsymmetric_check) { 1066 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1067 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1068 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1069 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1070 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1071 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1072 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1073 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1074 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1075 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1076 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1077 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1078 } else { 1079 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1080 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1081 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1082 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1083 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1084 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1085 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1086 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1087 } 1088 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1089 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1090 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1091 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1092 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1093 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1094 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1095 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1096 1097 /* check constraints */ 1098 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1099 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1100 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1101 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1102 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1103 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1104 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1105 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1106 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1107 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1108 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1109 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1110 1111 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1112 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1113 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1114 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1115 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1116 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1117 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1118 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1119 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1120 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1121 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1122 if (unsymmetric_check) { 1123 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1124 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1125 } 1126 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1127 } 1128 /* get back data */ 1129 *coarse_submat_vals_n = coarse_submat_vals; 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1135 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1136 { 1137 Mat *work_mat; 1138 IS isrow_s,iscol_s; 1139 PetscBool rsorted,csorted; 1140 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1141 PetscErrorCode ierr; 1142 1143 PetscFunctionBegin; 1144 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1145 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1146 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1147 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1148 1149 if (!rsorted) { 1150 const PetscInt *idxs; 1151 PetscInt *idxs_sorted,i; 1152 1153 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1154 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1155 for (i=0;i<rsize;i++) { 1156 idxs_perm_r[i] = i; 1157 } 1158 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1159 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1160 for (i=0;i<rsize;i++) { 1161 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1162 } 1163 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1164 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1165 } else { 1166 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1167 isrow_s = isrow; 1168 } 1169 1170 if (!csorted) { 1171 if (isrow == iscol) { 1172 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1173 iscol_s = isrow_s; 1174 } else { 1175 const PetscInt *idxs; 1176 PetscInt *idxs_sorted,i; 1177 1178 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1179 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1180 for (i=0;i<csize;i++) { 1181 idxs_perm_c[i] = i; 1182 } 1183 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1184 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1185 for (i=0;i<csize;i++) { 1186 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1187 } 1188 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1189 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1190 } 1191 } else { 1192 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1193 iscol_s = iscol; 1194 } 1195 1196 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1197 1198 if (!rsorted || !csorted) { 1199 Mat new_mat; 1200 IS is_perm_r,is_perm_c; 1201 1202 if (!rsorted) { 1203 PetscInt *idxs_r,i; 1204 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1205 for (i=0;i<rsize;i++) { 1206 idxs_r[idxs_perm_r[i]] = i; 1207 } 1208 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1209 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1210 } else { 1211 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1212 } 1213 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1214 1215 if (!csorted) { 1216 if (isrow_s == iscol_s) { 1217 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1218 is_perm_c = is_perm_r; 1219 } else { 1220 PetscInt *idxs_c,i; 1221 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1222 for (i=0;i<csize;i++) { 1223 idxs_c[idxs_perm_c[i]] = i; 1224 } 1225 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1226 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1227 } 1228 } else { 1229 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1230 } 1231 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1232 1233 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1234 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1235 work_mat[0] = new_mat; 1236 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1237 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1238 } 1239 1240 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1241 *B = work_mat[0]; 1242 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1243 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1244 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1250 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1251 { 1252 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1253 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1254 Mat new_mat; 1255 IS is_local,is_global; 1256 PetscInt local_size; 1257 PetscBool isseqaij; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1262 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1263 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1264 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1265 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1266 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1267 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1268 1269 /* check */ 1270 if (pcbddc->dbg_flag) { 1271 Vec x,x_change; 1272 PetscReal error; 1273 1274 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1275 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1276 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1277 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1278 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1279 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1280 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1281 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1282 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1283 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1284 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1285 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1286 ierr = VecDestroy(&x);CHKERRQ(ierr); 1287 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1288 } 1289 1290 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1291 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1292 if (isseqaij) { 1293 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1294 } else { 1295 Mat work_mat; 1296 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1297 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1298 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1299 } 1300 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1301 #if !defined(PETSC_USE_COMPLEX) 1302 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr); 1303 #endif 1304 /* 1305 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1306 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1307 */ 1308 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1314 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1315 { 1316 PC_IS* pcis = (PC_IS*)(pc->data); 1317 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1318 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1319 PetscInt *idx_R_local=NULL; 1320 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1321 PetscInt vbs,bs; 1322 PetscBT bitmask=NULL; 1323 PetscErrorCode ierr; 1324 1325 PetscFunctionBegin; 1326 /* 1327 No need to setup local scatters if 1328 - primal space is unchanged 1329 AND 1330 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1331 AND 1332 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1333 */ 1334 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1335 PetscFunctionReturn(0); 1336 } 1337 /* destroy old objects */ 1338 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1339 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1340 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1341 /* Set Non-overlapping dimensions */ 1342 n_B = pcis->n_B; 1343 n_D = pcis->n - n_B; 1344 n_vertices = pcbddc->n_vertices; 1345 1346 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1347 1348 /* create auxiliary bitmask and allocate workspace */ 1349 if (!sub_schurs->reuse_mumps) { 1350 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1351 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1352 for (i=0;i<n_vertices;i++) { 1353 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1354 } 1355 1356 for (i=0, n_R=0; i<pcis->n; i++) { 1357 if (!PetscBTLookup(bitmask,i)) { 1358 idx_R_local[n_R++] = i; 1359 } 1360 } 1361 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1362 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1363 1364 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1365 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1366 } 1367 1368 /* Block code */ 1369 vbs = 1; 1370 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1371 if (bs>1 && !(n_vertices%bs)) { 1372 PetscBool is_blocked = PETSC_TRUE; 1373 PetscInt *vary; 1374 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1375 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1376 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1377 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1378 for (i=0; i<n_vertices/bs; i++) { 1379 if (vary[i]!=0 && vary[i]!=bs) { 1380 is_blocked = PETSC_FALSE; 1381 break; 1382 } 1383 } 1384 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1385 vbs = bs; 1386 for (i=0;i<n_R/vbs;i++) { 1387 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1388 } 1389 } 1390 ierr = PetscFree(vary);CHKERRQ(ierr); 1391 } 1392 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1393 if (sub_schurs->reuse_mumps) { 1394 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1395 1396 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1397 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1398 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1399 reuse_mumps->is_R = pcbddc->is_R_local; 1400 } else { 1401 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1402 } 1403 1404 /* print some info if requested */ 1405 if (pcbddc->dbg_flag) { 1406 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1407 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1408 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1409 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1410 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1411 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1412 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1413 } 1414 1415 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1416 if (!sub_schurs->reuse_mumps) { 1417 IS is_aux1,is_aux2; 1418 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1419 1420 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1421 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1422 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1423 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1424 for (i=0; i<n_D; i++) { 1425 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1426 } 1427 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1428 for (i=0, j=0; i<n_R; i++) { 1429 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1430 aux_array1[j++] = i; 1431 } 1432 } 1433 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1434 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1435 for (i=0, j=0; i<n_B; i++) { 1436 if (!PetscBTLookup(bitmask,is_indices[i])) { 1437 aux_array2[j++] = i; 1438 } 1439 } 1440 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1441 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1442 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1443 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1444 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1445 1446 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1447 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1448 for (i=0, j=0; i<n_R; i++) { 1449 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1450 aux_array1[j++] = i; 1451 } 1452 } 1453 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1454 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1455 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1456 } 1457 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1458 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1459 } else { 1460 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1461 IS tis; 1462 PetscInt schur_size; 1463 1464 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1465 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1466 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1467 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1468 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1469 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1470 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1471 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1472 } 1473 } 1474 PetscFunctionReturn(0); 1475 } 1476 1477 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1480 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1481 { 1482 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1483 PC_IS *pcis = (PC_IS*)pc->data; 1484 PC pc_temp; 1485 Mat A_RR; 1486 MatReuse reuse; 1487 PetscScalar m_one = -1.0; 1488 PetscReal value; 1489 PetscInt n_D,n_R,ibs,mbs; 1490 PetscBool use_exact,use_exact_reduced,issbaij; 1491 PetscErrorCode ierr; 1492 /* prefixes stuff */ 1493 char dir_prefix[256],neu_prefix[256],str_level[16]; 1494 size_t len; 1495 1496 PetscFunctionBegin; 1497 1498 /* compute prefixes */ 1499 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1500 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1501 if (!pcbddc->current_level) { 1502 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1503 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1504 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1505 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1506 } else { 1507 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1508 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1509 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1510 len -= 15; /* remove "pc_bddc_coarse_" */ 1511 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1512 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1513 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1514 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1515 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1516 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1517 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1518 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1519 } 1520 1521 /* DIRICHLET PROBLEM */ 1522 if (dirichlet) { 1523 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1524 if (pcbddc->issym) { 1525 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1526 } 1527 /* Matrix for Dirichlet problem is pcis->A_II */ 1528 n_D = pcis->n - pcis->n_B; 1529 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1530 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1531 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1532 /* default */ 1533 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1534 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1535 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1536 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1537 if (issbaij) { 1538 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1539 } else { 1540 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1541 } 1542 /* Allow user's customization */ 1543 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1544 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1545 } 1546 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1547 if (sub_schurs->reuse_mumps) { 1548 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1549 1550 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1551 } 1552 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1553 if (!n_D) { 1554 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1555 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1556 } 1557 /* Set Up KSP for Dirichlet problem of BDDC */ 1558 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1559 /* set ksp_D into pcis data */ 1560 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1561 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1562 pcis->ksp_D = pcbddc->ksp_D; 1563 } 1564 1565 /* NEUMANN PROBLEM */ 1566 A_RR = 0; 1567 if (neumann) { 1568 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1569 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1570 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1571 if (pcbddc->ksp_R) { /* already created ksp */ 1572 PetscInt nn_R; 1573 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1574 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1575 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1576 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1577 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1578 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1579 reuse = MAT_INITIAL_MATRIX; 1580 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1581 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1582 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1583 reuse = MAT_INITIAL_MATRIX; 1584 } else { /* safe to reuse the matrix */ 1585 reuse = MAT_REUSE_MATRIX; 1586 } 1587 } 1588 /* last check */ 1589 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1590 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1591 reuse = MAT_INITIAL_MATRIX; 1592 } 1593 } else { /* first time, so we need to create the matrix */ 1594 reuse = MAT_INITIAL_MATRIX; 1595 } 1596 /* extract A_RR */ 1597 if (!sub_schurs->reuse_mumps) { 1598 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1599 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1600 if (ibs != mbs) { 1601 Mat newmat; 1602 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1603 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1604 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1605 } else { 1606 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1607 } 1608 if (pcbddc->issym) { 1609 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1610 } 1611 } else { 1612 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1613 1614 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1615 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1616 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1617 } 1618 if (!pcbddc->ksp_R) { /* create object if not present */ 1619 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1620 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1621 /* default */ 1622 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1623 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1624 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1625 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1626 if (issbaij) { 1627 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1628 } else { 1629 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1630 } 1631 /* Allow user's customization */ 1632 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1633 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1634 } 1635 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1636 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1637 if (!n_R) { 1638 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1639 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1640 } 1641 /* Reuse MUMPS solver if it is present */ 1642 if (sub_schurs->reuse_mumps) { 1643 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1644 1645 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1646 } 1647 /* Set Up KSP for Neumann problem of BDDC */ 1648 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1649 } 1650 /* free Neumann problem's matrix */ 1651 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1652 1653 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1654 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1655 if (pcbddc->dbg_flag) { 1656 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1657 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1659 } 1660 if (dirichlet) { /* Dirichlet */ 1661 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1662 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1663 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1664 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1665 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1666 /* need to be adapted? */ 1667 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1668 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1669 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1670 /* print info */ 1671 if (pcbddc->dbg_flag) { 1672 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1673 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1674 } 1675 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1676 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1677 } 1678 } 1679 if (neumann) { /* Neumann */ 1680 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1681 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1682 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1683 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1684 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1685 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1686 /* need to be adapted? */ 1687 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1688 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1689 /* print info */ 1690 if (pcbddc->dbg_flag) { 1691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1692 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1693 } 1694 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1695 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1696 } 1697 } 1698 } 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1704 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1705 { 1706 PetscErrorCode ierr; 1707 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1708 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1709 1710 PetscFunctionBegin; 1711 if (!sub_schurs->reuse_mumps) { 1712 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1713 } 1714 if (!pcbddc->switch_static) { 1715 if (applytranspose && pcbddc->local_auxmat1) { 1716 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1717 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1718 } 1719 if (!sub_schurs->reuse_mumps) { 1720 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1721 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1722 } else { 1723 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1724 1725 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1726 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1727 } 1728 } else { 1729 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1730 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1731 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1732 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1733 if (applytranspose && pcbddc->local_auxmat1) { 1734 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1735 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1736 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1737 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1738 } 1739 } 1740 if (!sub_schurs->reuse_mumps) { 1741 if (applytranspose) { 1742 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1743 } else { 1744 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1745 } 1746 #if defined(PETSC_HAVE_MUMPS) 1747 } else { 1748 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1749 1750 if (applytranspose) { 1751 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1752 } else { 1753 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1754 } 1755 #endif 1756 } 1757 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1758 if (!pcbddc->switch_static) { 1759 if (!sub_schurs->reuse_mumps) { 1760 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1761 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1762 } else { 1763 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1764 1765 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1766 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1767 } 1768 if (!applytranspose && pcbddc->local_auxmat1) { 1769 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1770 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1771 } 1772 } else { 1773 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1774 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1775 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1776 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1777 if (!applytranspose && pcbddc->local_auxmat1) { 1778 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1779 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1780 } 1781 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1782 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1783 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1784 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1785 } 1786 PetscFunctionReturn(0); 1787 } 1788 1789 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1790 #undef __FUNCT__ 1791 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1792 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1793 { 1794 PetscErrorCode ierr; 1795 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1796 PC_IS* pcis = (PC_IS*) (pc->data); 1797 const PetscScalar zero = 0.0; 1798 1799 PetscFunctionBegin; 1800 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1801 if (applytranspose) { 1802 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1803 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1804 } else { 1805 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1806 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1807 } 1808 /* start communications from local primal nodes to rhs of coarse solver */ 1809 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1810 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1811 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1812 1813 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1814 /* TODO remove null space when doing multilevel */ 1815 if (pcbddc->coarse_ksp) { 1816 Vec rhs,sol; 1817 1818 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1819 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1820 if (applytranspose) { 1821 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1822 } else { 1823 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1824 } 1825 } 1826 1827 /* Local solution on R nodes */ 1828 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1829 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1830 } 1831 1832 /* communications from coarse sol to local primal nodes */ 1833 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1834 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1835 1836 /* Sum contributions from two levels */ 1837 if (applytranspose) { 1838 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1839 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1840 } else { 1841 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1842 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1843 } 1844 PetscFunctionReturn(0); 1845 } 1846 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1849 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1850 { 1851 PetscErrorCode ierr; 1852 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1853 PetscScalar *array; 1854 Vec from,to; 1855 1856 PetscFunctionBegin; 1857 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1858 from = pcbddc->coarse_vec; 1859 to = pcbddc->vec1_P; 1860 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1861 Vec tvec; 1862 1863 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1864 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1865 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1866 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1867 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1868 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1869 } 1870 } else { /* from local to global -> put data in coarse right hand side */ 1871 from = pcbddc->vec1_P; 1872 to = pcbddc->coarse_vec; 1873 } 1874 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1880 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1881 { 1882 PetscErrorCode ierr; 1883 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1884 PetscScalar *array; 1885 Vec from,to; 1886 1887 PetscFunctionBegin; 1888 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1889 from = pcbddc->coarse_vec; 1890 to = pcbddc->vec1_P; 1891 } else { /* from local to global -> put data in coarse right hand side */ 1892 from = pcbddc->vec1_P; 1893 to = pcbddc->coarse_vec; 1894 } 1895 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1896 if (smode == SCATTER_FORWARD) { 1897 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1898 Vec tvec; 1899 1900 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1901 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1902 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1903 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1904 } 1905 } else { 1906 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1907 ierr = VecResetArray(from);CHKERRQ(ierr); 1908 } 1909 } 1910 PetscFunctionReturn(0); 1911 } 1912 1913 /* uncomment for testing purposes */ 1914 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1917 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1918 { 1919 PetscErrorCode ierr; 1920 PC_IS* pcis = (PC_IS*)(pc->data); 1921 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1922 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1923 /* one and zero */ 1924 PetscScalar one=1.0,zero=0.0; 1925 /* space to store constraints and their local indices */ 1926 PetscScalar *constraints_data; 1927 PetscInt *constraints_idxs,*constraints_idxs_B; 1928 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1929 PetscInt *constraints_n; 1930 /* iterators */ 1931 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1932 /* BLAS integers */ 1933 PetscBLASInt lwork,lierr; 1934 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1935 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1936 /* reuse */ 1937 PetscInt olocal_primal_size,olocal_primal_size_cc; 1938 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1939 /* change of basis */ 1940 PetscBool qr_needed; 1941 PetscBT change_basis,qr_needed_idx; 1942 /* auxiliary stuff */ 1943 PetscInt *nnz,*is_indices; 1944 PetscInt ncc; 1945 /* some quantities */ 1946 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1947 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1948 1949 PetscFunctionBegin; 1950 /* Destroy Mat objects computed previously */ 1951 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1952 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1953 /* save info on constraints from previous setup (if any) */ 1954 olocal_primal_size = pcbddc->local_primal_size; 1955 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1956 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1957 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1958 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1959 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1960 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1961 1962 /* print some info */ 1963 if (pcbddc->dbg_flag) { 1964 IS vertices; 1965 PetscInt nv,nedges,nfaces; 1966 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1967 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1968 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1969 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1970 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1971 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1972 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1973 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1974 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1975 } 1976 1977 if (!pcbddc->adaptive_selection) { 1978 IS ISForVertices,*ISForFaces,*ISForEdges; 1979 MatNullSpace nearnullsp; 1980 const Vec *nearnullvecs; 1981 Vec *localnearnullsp; 1982 PetscScalar *array; 1983 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1984 PetscBool nnsp_has_cnst; 1985 /* LAPACK working arrays for SVD or POD */ 1986 PetscBool skip_lapack,boolforchange; 1987 PetscScalar *work; 1988 PetscReal *singular_vals; 1989 #if defined(PETSC_USE_COMPLEX) 1990 PetscReal *rwork; 1991 #endif 1992 #if defined(PETSC_MISSING_LAPACK_GESVD) 1993 PetscScalar *temp_basis,*correlation_mat; 1994 #else 1995 PetscBLASInt dummy_int=1; 1996 PetscScalar dummy_scalar=1.; 1997 #endif 1998 1999 /* Get index sets for faces, edges and vertices from graph */ 2000 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2001 /* free unneeded index sets */ 2002 if (!pcbddc->use_vertices) { 2003 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2004 } 2005 if (!pcbddc->use_edges) { 2006 for (i=0;i<n_ISForEdges;i++) { 2007 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2008 } 2009 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2010 n_ISForEdges = 0; 2011 } 2012 if (!pcbddc->use_faces) { 2013 for (i=0;i<n_ISForFaces;i++) { 2014 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2015 } 2016 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2017 n_ISForFaces = 0; 2018 } 2019 2020 #if defined(PETSC_USE_DEBUG) 2021 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2022 Also use_change_of_basis should be consistent among processors */ 2023 if (pcbddc->NullSpace) { 2024 PetscBool tbool[2],gbool[2]; 2025 2026 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2027 pcbddc->use_change_of_basis = PETSC_TRUE; 2028 if (!ISForEdges) { 2029 pcbddc->use_change_on_faces = PETSC_TRUE; 2030 } 2031 } 2032 tbool[0] = pcbddc->use_change_of_basis; 2033 tbool[1] = pcbddc->use_change_on_faces; 2034 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2035 pcbddc->use_change_of_basis = gbool[0]; 2036 pcbddc->use_change_on_faces = gbool[1]; 2037 } 2038 #endif 2039 2040 /* check if near null space is attached to global mat */ 2041 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2042 if (nearnullsp) { 2043 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2044 /* remove any stored info */ 2045 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2046 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2047 /* store information for BDDC solver reuse */ 2048 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2049 pcbddc->onearnullspace = nearnullsp; 2050 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2051 for (i=0;i<nnsp_size;i++) { 2052 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2053 } 2054 } else { /* if near null space is not provided BDDC uses constants by default */ 2055 nnsp_size = 0; 2056 nnsp_has_cnst = PETSC_TRUE; 2057 } 2058 /* get max number of constraints on a single cc */ 2059 max_constraints = nnsp_size; 2060 if (nnsp_has_cnst) max_constraints++; 2061 2062 /* 2063 Evaluate maximum storage size needed by the procedure 2064 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2065 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2066 There can be multiple constraints per connected component 2067 */ 2068 n_vertices = 0; 2069 if (ISForVertices) { 2070 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2071 } 2072 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2073 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2074 2075 total_counts = n_ISForFaces+n_ISForEdges; 2076 total_counts *= max_constraints; 2077 total_counts += n_vertices; 2078 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2079 2080 total_counts = 0; 2081 max_size_of_constraint = 0; 2082 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2083 IS used_is; 2084 if (i<n_ISForEdges) { 2085 used_is = ISForEdges[i]; 2086 } else { 2087 used_is = ISForFaces[i-n_ISForEdges]; 2088 } 2089 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2090 total_counts += j; 2091 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2092 } 2093 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2094 2095 /* get local part of global near null space vectors */ 2096 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2097 for (k=0;k<nnsp_size;k++) { 2098 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2099 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2100 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2101 } 2102 2103 /* whether or not to skip lapack calls */ 2104 skip_lapack = PETSC_TRUE; 2105 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2106 2107 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2108 if (!skip_lapack) { 2109 PetscScalar temp_work; 2110 2111 #if defined(PETSC_MISSING_LAPACK_GESVD) 2112 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2113 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2114 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2115 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2116 #if defined(PETSC_USE_COMPLEX) 2117 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2118 #endif 2119 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2120 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2121 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2122 lwork = -1; 2123 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2124 #if !defined(PETSC_USE_COMPLEX) 2125 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2126 #else 2127 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2128 #endif 2129 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2130 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2131 #else /* on missing GESVD */ 2132 /* SVD */ 2133 PetscInt max_n,min_n; 2134 max_n = max_size_of_constraint; 2135 min_n = max_constraints; 2136 if (max_size_of_constraint < max_constraints) { 2137 min_n = max_size_of_constraint; 2138 max_n = max_constraints; 2139 } 2140 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2141 #if defined(PETSC_USE_COMPLEX) 2142 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2143 #endif 2144 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2145 lwork = -1; 2146 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2147 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2148 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2149 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2150 #if !defined(PETSC_USE_COMPLEX) 2151 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2152 #else 2153 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2154 #endif 2155 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2156 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2157 #endif /* on missing GESVD */ 2158 /* Allocate optimal workspace */ 2159 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2160 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2161 } 2162 /* Now we can loop on constraining sets */ 2163 total_counts = 0; 2164 constraints_idxs_ptr[0] = 0; 2165 constraints_data_ptr[0] = 0; 2166 /* vertices */ 2167 if (n_vertices) { 2168 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2169 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2170 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2171 for (i=0;i<n_vertices;i++) { 2172 constraints_n[total_counts] = 1; 2173 constraints_data[total_counts] = 1.0; 2174 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2175 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2176 total_counts++; 2177 } 2178 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2179 PetscBool used_vertex; 2180 for (i=0;i<n_vertices;i++) { 2181 used_vertex = PETSC_FALSE; 2182 k = 0; 2183 while (!used_vertex && k<nnsp_size) { 2184 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2185 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2186 constraints_n[total_counts] = 1; 2187 constraints_idxs[total_counts] = is_indices[i]; 2188 constraints_data[total_counts] = 1.0; 2189 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2190 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2191 total_counts++; 2192 used_vertex = PETSC_TRUE; 2193 } 2194 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2195 k++; 2196 } 2197 } 2198 } 2199 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2200 n_vertices = total_counts; 2201 } 2202 2203 /* edges and faces */ 2204 total_counts_cc = total_counts; 2205 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2206 IS used_is; 2207 PetscBool idxs_copied = PETSC_FALSE; 2208 2209 if (ncc<n_ISForEdges) { 2210 used_is = ISForEdges[ncc]; 2211 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2212 } else { 2213 used_is = ISForFaces[ncc-n_ISForEdges]; 2214 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2215 } 2216 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2217 2218 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2219 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2220 /* change of basis should not be performed on local periodic nodes */ 2221 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2222 if (nnsp_has_cnst) { 2223 PetscScalar quad_value; 2224 2225 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2226 idxs_copied = PETSC_TRUE; 2227 2228 if (!pcbddc->use_nnsp_true) { 2229 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2230 } else { 2231 quad_value = 1.0; 2232 } 2233 for (j=0;j<size_of_constraint;j++) { 2234 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2235 } 2236 temp_constraints++; 2237 total_counts++; 2238 } 2239 for (k=0;k<nnsp_size;k++) { 2240 PetscReal real_value; 2241 PetscScalar *ptr_to_data; 2242 2243 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2244 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2245 for (j=0;j<size_of_constraint;j++) { 2246 ptr_to_data[j] = array[is_indices[j]]; 2247 } 2248 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2249 /* check if array is null on the connected component */ 2250 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2251 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2252 if (real_value > 0.0) { /* keep indices and values */ 2253 temp_constraints++; 2254 total_counts++; 2255 if (!idxs_copied) { 2256 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2257 idxs_copied = PETSC_TRUE; 2258 } 2259 } 2260 } 2261 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2262 valid_constraints = temp_constraints; 2263 if (!pcbddc->use_nnsp_true && temp_constraints) { 2264 if (temp_constraints == 1) { /* just normalize the constraint */ 2265 PetscScalar norm,*ptr_to_data; 2266 2267 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2268 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2269 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2270 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2271 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2272 } else { /* perform SVD */ 2273 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2274 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2275 2276 #if defined(PETSC_MISSING_LAPACK_GESVD) 2277 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2278 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2279 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2280 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2281 from that computed using LAPACKgesvd 2282 -> This is due to a different computation of eigenvectors in LAPACKheev 2283 -> The quality of the POD-computed basis will be the same */ 2284 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2285 /* Store upper triangular part of correlation matrix */ 2286 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2287 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2288 for (j=0;j<temp_constraints;j++) { 2289 for (k=0;k<j+1;k++) { 2290 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2291 } 2292 } 2293 /* compute eigenvalues and eigenvectors of correlation matrix */ 2294 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2295 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2296 #if !defined(PETSC_USE_COMPLEX) 2297 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2298 #else 2299 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2300 #endif 2301 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2302 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2303 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2304 j = 0; 2305 while (j < temp_constraints && singular_vals[j] < tol) j++; 2306 total_counts = total_counts-j; 2307 valid_constraints = temp_constraints-j; 2308 /* scale and copy POD basis into used quadrature memory */ 2309 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2310 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2311 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2312 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2313 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2314 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2315 if (j<temp_constraints) { 2316 PetscInt ii; 2317 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2318 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2319 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2320 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2321 for (k=0;k<temp_constraints-j;k++) { 2322 for (ii=0;ii<size_of_constraint;ii++) { 2323 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2324 } 2325 } 2326 } 2327 #else /* on missing GESVD */ 2328 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2329 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2330 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2331 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2332 #if !defined(PETSC_USE_COMPLEX) 2333 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2334 #else 2335 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2336 #endif 2337 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2338 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2339 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2340 k = temp_constraints; 2341 if (k > size_of_constraint) k = size_of_constraint; 2342 j = 0; 2343 while (j < k && singular_vals[k-j-1] < tol) j++; 2344 valid_constraints = k-j; 2345 total_counts = total_counts-temp_constraints+valid_constraints; 2346 #endif /* on missing GESVD */ 2347 } 2348 } 2349 /* update pointers information */ 2350 if (valid_constraints) { 2351 constraints_n[total_counts_cc] = valid_constraints; 2352 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2353 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2354 /* set change_of_basis flag */ 2355 if (boolforchange) { 2356 PetscBTSet(change_basis,total_counts_cc); 2357 } 2358 total_counts_cc++; 2359 } 2360 } 2361 /* free workspace */ 2362 if (!skip_lapack) { 2363 ierr = PetscFree(work);CHKERRQ(ierr); 2364 #if defined(PETSC_USE_COMPLEX) 2365 ierr = PetscFree(rwork);CHKERRQ(ierr); 2366 #endif 2367 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2368 #if defined(PETSC_MISSING_LAPACK_GESVD) 2369 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2370 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2371 #endif 2372 } 2373 for (k=0;k<nnsp_size;k++) { 2374 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2375 } 2376 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2377 /* free index sets of faces, edges and vertices */ 2378 for (i=0;i<n_ISForFaces;i++) { 2379 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2380 } 2381 if (n_ISForFaces) { 2382 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2383 } 2384 for (i=0;i<n_ISForEdges;i++) { 2385 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2386 } 2387 if (n_ISForEdges) { 2388 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2389 } 2390 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2391 } else { 2392 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2393 2394 total_counts = 0; 2395 n_vertices = 0; 2396 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2397 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2398 } 2399 max_constraints = 0; 2400 total_counts_cc = 0; 2401 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2402 total_counts += pcbddc->adaptive_constraints_n[i]; 2403 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2404 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2405 } 2406 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2407 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2408 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2409 constraints_data = pcbddc->adaptive_constraints_data; 2410 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2411 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2412 total_counts_cc = 0; 2413 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2414 if (pcbddc->adaptive_constraints_n[i]) { 2415 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2416 } 2417 } 2418 #if 0 2419 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2420 for (i=0;i<total_counts_cc;i++) { 2421 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2422 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2423 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2424 printf(" %d",constraints_idxs[j]); 2425 } 2426 printf("\n"); 2427 printf("number of cc: %d\n",constraints_n[i]); 2428 } 2429 for (i=0;i<n_vertices;i++) { 2430 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2431 } 2432 for (i=0;i<sub_schurs->n_subs;i++) { 2433 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2434 } 2435 #endif 2436 2437 max_size_of_constraint = 0; 2438 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2439 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2440 /* Change of basis */ 2441 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2442 if (pcbddc->use_change_of_basis) { 2443 for (i=0;i<sub_schurs->n_subs;i++) { 2444 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2445 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2446 } 2447 } 2448 } 2449 } 2450 pcbddc->local_primal_size = total_counts; 2451 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2452 2453 /* map constraints_idxs in boundary numbering */ 2454 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2455 if (i != constraints_idxs_ptr[total_counts_cc]) { 2456 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2457 } 2458 2459 /* Create constraint matrix */ 2460 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2461 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2462 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2463 2464 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2465 /* determine if a QR strategy is needed for change of basis */ 2466 qr_needed = PETSC_FALSE; 2467 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2468 total_primal_vertices=0; 2469 pcbddc->local_primal_size_cc = 0; 2470 for (i=0;i<total_counts_cc;i++) { 2471 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2472 if (size_of_constraint == 1) { 2473 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2474 pcbddc->local_primal_size_cc += 1; 2475 } else if (PetscBTLookup(change_basis,i)) { 2476 for (k=0;k<constraints_n[i];k++) { 2477 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2478 } 2479 pcbddc->local_primal_size_cc += constraints_n[i]; 2480 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2481 PetscBTSet(qr_needed_idx,i); 2482 qr_needed = PETSC_TRUE; 2483 } 2484 } else { 2485 pcbddc->local_primal_size_cc += 1; 2486 } 2487 } 2488 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2489 pcbddc->n_vertices = total_primal_vertices; 2490 /* permute indices in order to have a sorted set of vertices */ 2491 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2492 2493 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2494 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2495 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2496 2497 /* nonzero structure of constraint matrix */ 2498 /* and get reference dof for local constraints */ 2499 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2500 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2501 2502 j = total_primal_vertices; 2503 total_counts = total_primal_vertices; 2504 cum = total_primal_vertices; 2505 for (i=n_vertices;i<total_counts_cc;i++) { 2506 if (!PetscBTLookup(change_basis,i)) { 2507 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2508 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2509 cum++; 2510 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2511 for (k=0;k<constraints_n[i];k++) { 2512 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2513 nnz[j+k] = size_of_constraint; 2514 } 2515 j += constraints_n[i]; 2516 } 2517 } 2518 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2519 ierr = PetscFree(nnz);CHKERRQ(ierr); 2520 2521 /* set values in constraint matrix */ 2522 for (i=0;i<total_primal_vertices;i++) { 2523 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2524 } 2525 total_counts = total_primal_vertices; 2526 for (i=n_vertices;i<total_counts_cc;i++) { 2527 if (!PetscBTLookup(change_basis,i)) { 2528 PetscInt *cols; 2529 2530 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2531 cols = constraints_idxs+constraints_idxs_ptr[i]; 2532 for (k=0;k<constraints_n[i];k++) { 2533 PetscInt row = total_counts+k; 2534 PetscScalar *vals; 2535 2536 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2537 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2538 } 2539 total_counts += constraints_n[i]; 2540 } 2541 } 2542 /* assembling */ 2543 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2544 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2545 2546 /* 2547 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2548 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2549 */ 2550 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2551 if (pcbddc->use_change_of_basis) { 2552 /* dual and primal dofs on a single cc */ 2553 PetscInt dual_dofs,primal_dofs; 2554 /* working stuff for GEQRF */ 2555 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2556 PetscBLASInt lqr_work; 2557 /* working stuff for UNGQR */ 2558 PetscScalar *gqr_work,lgqr_work_t; 2559 PetscBLASInt lgqr_work; 2560 /* working stuff for TRTRS */ 2561 PetscScalar *trs_rhs; 2562 PetscBLASInt Blas_NRHS; 2563 /* pointers for values insertion into change of basis matrix */ 2564 PetscInt *start_rows,*start_cols; 2565 PetscScalar *start_vals; 2566 /* working stuff for values insertion */ 2567 PetscBT is_primal; 2568 PetscInt *aux_primal_numbering_B; 2569 /* matrix sizes */ 2570 PetscInt global_size,local_size; 2571 /* temporary change of basis */ 2572 Mat localChangeOfBasisMatrix; 2573 /* extra space for debugging */ 2574 PetscScalar *dbg_work; 2575 2576 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2577 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2578 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2579 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2580 /* nonzeros for local mat */ 2581 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2582 for (i=0;i<pcis->n;i++) nnz[i]=1; 2583 for (i=n_vertices;i<total_counts_cc;i++) { 2584 if (PetscBTLookup(change_basis,i)) { 2585 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2586 if (PetscBTLookup(qr_needed_idx,i)) { 2587 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2588 } else { 2589 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2590 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2591 } 2592 } 2593 } 2594 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2595 ierr = PetscFree(nnz);CHKERRQ(ierr); 2596 /* Set initial identity in the matrix */ 2597 for (i=0;i<pcis->n;i++) { 2598 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2599 } 2600 2601 if (pcbddc->dbg_flag) { 2602 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2603 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2604 } 2605 2606 2607 /* Now we loop on the constraints which need a change of basis */ 2608 /* 2609 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2610 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2611 2612 Basic blocks of change of basis matrix T computed by 2613 2614 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2615 2616 | 1 0 ... 0 s_1/S | 2617 | 0 1 ... 0 s_2/S | 2618 | ... | 2619 | 0 ... 1 s_{n-1}/S | 2620 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2621 2622 with S = \sum_{i=1}^n s_i^2 2623 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2624 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2625 2626 - QR decomposition of constraints otherwise 2627 */ 2628 if (qr_needed) { 2629 /* space to store Q */ 2630 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2631 /* first we issue queries for optimal work */ 2632 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2633 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2634 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2635 lqr_work = -1; 2636 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2637 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2638 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2639 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2640 lgqr_work = -1; 2641 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2642 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2643 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2644 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2645 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2646 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2647 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2648 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2649 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2650 /* array to store scaling factors for reflectors */ 2651 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2652 /* array to store rhs and solution of triangular solver */ 2653 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2654 /* allocating workspace for check */ 2655 if (pcbddc->dbg_flag) { 2656 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2657 } 2658 } 2659 /* array to store whether a node is primal or not */ 2660 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2661 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2662 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2663 if (i != total_primal_vertices) { 2664 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2665 } 2666 for (i=0;i<total_primal_vertices;i++) { 2667 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2668 } 2669 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2670 2671 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2672 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2673 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2674 if (PetscBTLookup(change_basis,total_counts)) { 2675 /* get constraint info */ 2676 primal_dofs = constraints_n[total_counts]; 2677 dual_dofs = size_of_constraint-primal_dofs; 2678 2679 if (pcbddc->dbg_flag) { 2680 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2681 } 2682 2683 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2684 2685 /* copy quadrature constraints for change of basis check */ 2686 if (pcbddc->dbg_flag) { 2687 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2688 } 2689 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2690 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2691 2692 /* compute QR decomposition of constraints */ 2693 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2694 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2695 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2696 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2697 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2698 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2699 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2700 2701 /* explictly compute R^-T */ 2702 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2703 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2704 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2705 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2706 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2707 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2708 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2709 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2710 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2711 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2712 2713 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2714 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2715 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2716 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2717 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2718 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2719 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2720 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2721 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2722 2723 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2724 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2725 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2726 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2727 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2728 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2729 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2730 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2731 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2732 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2733 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2734 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2735 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2736 2737 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2738 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2739 /* insert cols for primal dofs */ 2740 for (j=0;j<primal_dofs;j++) { 2741 start_vals = &qr_basis[j*size_of_constraint]; 2742 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2743 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2744 } 2745 /* insert cols for dual dofs */ 2746 for (j=0,k=0;j<dual_dofs;k++) { 2747 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2748 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2749 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2750 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2751 j++; 2752 } 2753 } 2754 2755 /* check change of basis */ 2756 if (pcbddc->dbg_flag) { 2757 PetscInt ii,jj; 2758 PetscBool valid_qr=PETSC_TRUE; 2759 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2760 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2761 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2762 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2763 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2764 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2765 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2766 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2767 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2768 for (jj=0;jj<size_of_constraint;jj++) { 2769 for (ii=0;ii<primal_dofs;ii++) { 2770 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2771 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2772 } 2773 } 2774 if (!valid_qr) { 2775 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2776 for (jj=0;jj<size_of_constraint;jj++) { 2777 for (ii=0;ii<primal_dofs;ii++) { 2778 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2779 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2780 } 2781 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2782 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2783 } 2784 } 2785 } 2786 } else { 2787 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2788 } 2789 } 2790 } else { /* simple transformation block */ 2791 PetscInt row,col; 2792 PetscScalar val,norm; 2793 2794 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2795 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2796 for (j=0;j<size_of_constraint;j++) { 2797 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2798 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2799 if (!PetscBTLookup(is_primal,row_B)) { 2800 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2801 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2802 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2803 } else { 2804 for (k=0;k<size_of_constraint;k++) { 2805 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2806 if (row != col) { 2807 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2808 } else { 2809 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2810 } 2811 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2812 } 2813 } 2814 } 2815 if (pcbddc->dbg_flag) { 2816 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2817 } 2818 } 2819 } else { 2820 if (pcbddc->dbg_flag) { 2821 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2822 } 2823 } 2824 } 2825 2826 /* free workspace */ 2827 if (qr_needed) { 2828 if (pcbddc->dbg_flag) { 2829 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2830 } 2831 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2832 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2833 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2834 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2835 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2836 } 2837 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2838 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2839 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2840 2841 /* assembling of global change of variable */ 2842 { 2843 Mat tmat; 2844 PetscInt bs; 2845 2846 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2847 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2848 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2849 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2850 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2851 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2852 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2853 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2854 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2855 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2856 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2857 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2858 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2859 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2860 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2861 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2862 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2863 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2864 } 2865 /* check */ 2866 if (pcbddc->dbg_flag) { 2867 PetscReal error; 2868 Vec x,x_change; 2869 2870 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2871 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2872 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2873 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2874 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2875 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2876 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2877 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2878 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2879 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2880 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2881 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2882 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2883 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2884 ierr = VecDestroy(&x);CHKERRQ(ierr); 2885 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2886 } 2887 2888 /* adapt sub_schurs computed (if any) */ 2889 if (pcbddc->use_deluxe_scaling) { 2890 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2891 if (sub_schurs->S_Ej_all) { 2892 Mat S_new,tmat; 2893 IS is_all_N; 2894 2895 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2896 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2897 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2898 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2899 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2900 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2901 sub_schurs->S_Ej_all = S_new; 2902 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2903 if (sub_schurs->sum_S_Ej_all) { 2904 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2905 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2906 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2907 sub_schurs->sum_S_Ej_all = S_new; 2908 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2909 } 2910 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2911 } 2912 } 2913 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2914 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2915 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2916 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2917 } 2918 2919 /* set up change of basis context */ 2920 if (pcbddc->ChangeOfBasisMatrix) { 2921 PCBDDCChange_ctx change_ctx; 2922 2923 if (!pcbddc->new_global_mat) { 2924 PetscInt global_size,local_size; 2925 2926 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2927 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2928 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2929 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2930 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2931 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2932 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2933 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2934 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2935 } else { 2936 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2937 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2938 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2939 } 2940 if (!pcbddc->user_ChangeOfBasisMatrix) { 2941 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2942 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2943 } else { 2944 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2945 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2946 } 2947 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2948 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2949 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2950 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2951 } 2952 2953 /* check if a new primal space has been introduced */ 2954 pcbddc->new_primal_space_local = PETSC_TRUE; 2955 if (olocal_primal_size == pcbddc->local_primal_size) { 2956 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2957 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2958 if (!pcbddc->new_primal_space_local) { 2959 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2960 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2961 } 2962 } 2963 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2964 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2965 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2966 2967 /* flush dbg viewer */ 2968 if (pcbddc->dbg_flag) { 2969 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2970 } 2971 2972 /* free workspace */ 2973 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2974 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2975 if (!pcbddc->adaptive_selection) { 2976 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2977 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2978 } else { 2979 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2980 pcbddc->adaptive_constraints_idxs_ptr, 2981 pcbddc->adaptive_constraints_data_ptr, 2982 pcbddc->adaptive_constraints_idxs, 2983 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2984 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2985 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2986 } 2987 PetscFunctionReturn(0); 2988 } 2989 2990 #undef __FUNCT__ 2991 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2992 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2993 { 2994 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2995 PC_IS *pcis = (PC_IS*)pc->data; 2996 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2997 PetscInt ierr,i,vertex_size,N; 2998 PetscViewer viewer=pcbddc->dbg_viewer; 2999 3000 PetscFunctionBegin; 3001 /* Reset previously computed graph */ 3002 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3003 /* Init local Graph struct */ 3004 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3005 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 3006 3007 /* Check validity of the csr graph passed in by the user */ 3008 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3009 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3010 } 3011 3012 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3013 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3014 PetscInt *xadj,*adjncy; 3015 PetscInt nvtxs; 3016 PetscBool flg_row=PETSC_FALSE; 3017 3018 if (pcbddc->use_local_adj) { 3019 3020 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3021 if (flg_row) { 3022 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3023 pcbddc->computed_rowadj = PETSC_TRUE; 3024 } 3025 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3026 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3027 IS is_dummy; 3028 ISLocalToGlobalMapping l2gmap_dummy; 3029 PetscInt j,sum; 3030 PetscInt *cxadj,*cadjncy; 3031 const PetscInt *idxs; 3032 PCBDDCGraph graph; 3033 PetscBT is_on_boundary; 3034 3035 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3036 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3037 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3038 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3039 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3040 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3041 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3042 if (flg_row) { 3043 graph->xadj = xadj; 3044 graph->adjncy = adjncy; 3045 } 3046 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3047 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3048 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3049 3050 if (pcbddc->dbg_flag) { 3051 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3052 for (i=0;i<graph->ncc;i++) { 3053 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3054 } 3055 } 3056 3057 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3058 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3059 for (i=0;i<pcis->n_B;i++) { 3060 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3061 } 3062 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3063 3064 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3065 sum = 0; 3066 for (i=0;i<graph->ncc;i++) { 3067 PetscInt sizecc = 0; 3068 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3069 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3070 sizecc++; 3071 } 3072 } 3073 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3074 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3075 cxadj[graph->queue[j]] = sizecc; 3076 } 3077 } 3078 sum += sizecc*sizecc; 3079 } 3080 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3081 sum = 0; 3082 for (i=0;i<pcis->n;i++) { 3083 PetscInt temp = cxadj[i]; 3084 cxadj[i] = sum; 3085 sum += temp; 3086 } 3087 cxadj[pcis->n] = sum; 3088 for (i=0;i<graph->ncc;i++) { 3089 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3090 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3091 PetscInt k,sizecc = 0; 3092 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3093 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3094 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3095 sizecc++; 3096 } 3097 } 3098 } 3099 } 3100 } 3101 if (sum) { 3102 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3103 } else { 3104 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3105 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3106 } 3107 graph->xadj = 0; 3108 graph->adjncy = 0; 3109 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3110 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3111 } 3112 } 3113 if (pcbddc->dbg_flag) { 3114 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3115 } 3116 3117 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3118 vertex_size = 1; 3119 if (pcbddc->user_provided_isfordofs) { 3120 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3121 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3122 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3123 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3124 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3125 } 3126 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3127 pcbddc->n_ISForDofs = 0; 3128 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3129 } 3130 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3131 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3132 } else { 3133 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3134 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3135 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3136 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3137 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3138 } 3139 } 3140 } 3141 3142 /* Setup of Graph */ 3143 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3144 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3145 } 3146 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3147 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3148 } 3149 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3150 3151 /* Graph's connected components analysis */ 3152 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3153 3154 /* print some info to stdout */ 3155 if (pcbddc->dbg_flag) { 3156 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3157 } 3158 3159 /* mark topography has done */ 3160 pcbddc->recompute_topography = PETSC_FALSE; 3161 PetscFunctionReturn(0); 3162 } 3163 3164 #undef __FUNCT__ 3165 #define __FUNCT__ "PCBDDCSubsetNumbering" 3166 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3167 { 3168 Vec local_vec,global_vec; 3169 IS seqis,paris; 3170 VecScatter scatter_ctx; 3171 PetscScalar *array; 3172 PetscInt *temp_global_dofs; 3173 PetscScalar globalsum; 3174 PetscInt i,j,s; 3175 PetscInt nlocals,first_index,old_index,max_local,max_global; 3176 PetscMPIInt rank_prec_comm,size_prec_comm; 3177 PetscInt *dof_sizes,*dof_displs; 3178 PetscBool first_found; 3179 PetscErrorCode ierr; 3180 3181 PetscFunctionBegin; 3182 /* mpi buffers */ 3183 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3184 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3185 j = ( !rank_prec_comm ? size_prec_comm : 0); 3186 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3187 /* get maximum size of subset */ 3188 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3189 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3190 max_local = 0; 3191 for (i=0;i<n_local_dofs;i++) { 3192 if (max_local < temp_global_dofs[i] ) { 3193 max_local = temp_global_dofs[i]; 3194 } 3195 } 3196 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3197 max_global++; 3198 max_local = 0; 3199 for (i=0;i<n_local_dofs;i++) { 3200 if (max_local < local_dofs[i] ) { 3201 max_local = local_dofs[i]; 3202 } 3203 } 3204 max_local++; 3205 /* allocate workspace */ 3206 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3207 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3208 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3209 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3210 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3211 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3212 /* create scatter */ 3213 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3214 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3215 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3216 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3217 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3218 /* init array */ 3219 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3220 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3221 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3222 if (local_dofs_mult) { 3223 for (i=0;i<n_local_dofs;i++) { 3224 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3225 } 3226 } else { 3227 for (i=0;i<n_local_dofs;i++) { 3228 array[local_dofs[i]]=1.0; 3229 } 3230 } 3231 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3232 /* scatter into global vec and get total number of global dofs */ 3233 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3234 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3235 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3236 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3237 /* Fill global_vec with cumulative function for global numbering */ 3238 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3239 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3240 nlocals = 0; 3241 first_index = -1; 3242 first_found = PETSC_FALSE; 3243 for (i=0;i<s;i++) { 3244 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3245 first_found = PETSC_TRUE; 3246 first_index = i; 3247 } 3248 nlocals += (PetscInt)PetscRealPart(array[i]); 3249 } 3250 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3251 if (!rank_prec_comm) { 3252 dof_displs[0]=0; 3253 for (i=1;i<size_prec_comm;i++) { 3254 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3255 } 3256 } 3257 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3258 if (first_found) { 3259 array[first_index] += (PetscScalar)nlocals; 3260 old_index = first_index; 3261 for (i=first_index+1;i<s;i++) { 3262 if (PetscRealPart(array[i]) > 0.1) { 3263 array[i] += array[old_index]; 3264 old_index = i; 3265 } 3266 } 3267 } 3268 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3269 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3270 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3271 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3272 /* get global ordering of local dofs */ 3273 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3274 if (local_dofs_mult) { 3275 for (i=0;i<n_local_dofs;i++) { 3276 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3277 } 3278 } else { 3279 for (i=0;i<n_local_dofs;i++) { 3280 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3281 } 3282 } 3283 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3284 /* free workspace */ 3285 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3286 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3287 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3288 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3289 /* return pointer to global ordering of local dofs */ 3290 *global_numbering_subset = temp_global_dofs; 3291 PetscFunctionReturn(0); 3292 } 3293 3294 #undef __FUNCT__ 3295 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3296 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3297 { 3298 PetscInt i,j; 3299 PetscScalar *alphas; 3300 PetscErrorCode ierr; 3301 3302 PetscFunctionBegin; 3303 /* this implements stabilized Gram-Schmidt */ 3304 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3305 for (i=0;i<n;i++) { 3306 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3307 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3308 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3309 } 3310 ierr = PetscFree(alphas);CHKERRQ(ierr); 3311 PetscFunctionReturn(0); 3312 } 3313 3314 #undef __FUNCT__ 3315 #define __FUNCT__ "MatISGetSubassemblingPattern" 3316 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3317 { 3318 Mat subdomain_adj; 3319 IS new_ranks,ranks_send_to; 3320 MatPartitioning partitioner; 3321 Mat_IS *matis; 3322 PetscInt n_neighs,*neighs,*n_shared,**shared; 3323 PetscInt prank; 3324 PetscMPIInt size,rank,color; 3325 PetscInt *xadj,*adjncy,*oldranks; 3326 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3327 PetscInt i,local_size,threshold=0; 3328 PetscErrorCode ierr; 3329 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3330 PetscSubcomm subcomm; 3331 3332 PetscFunctionBegin; 3333 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3334 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3335 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3336 3337 /* Get info on mapping */ 3338 matis = (Mat_IS*)(mat->data); 3339 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3340 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3341 3342 /* build local CSR graph of subdomains' connectivity */ 3343 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3344 xadj[0] = 0; 3345 xadj[1] = PetscMax(n_neighs-1,0); 3346 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3347 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3348 3349 if (threshold) { 3350 PetscInt xadj_count = 0; 3351 for (i=1;i<n_neighs;i++) { 3352 if (n_shared[i] > threshold) { 3353 adjncy[xadj_count] = neighs[i]; 3354 adjncy_wgt[xadj_count] = n_shared[i]; 3355 xadj_count++; 3356 } 3357 } 3358 xadj[1] = xadj_count; 3359 } else { 3360 if (xadj[1]) { 3361 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3362 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3363 } 3364 } 3365 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3366 if (use_square) { 3367 for (i=0;i<xadj[1];i++) { 3368 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3369 } 3370 } 3371 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3372 3373 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3374 3375 /* 3376 Restrict work on active processes only. 3377 */ 3378 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3379 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3380 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3381 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3382 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3383 if (color) { 3384 ierr = PetscFree(xadj);CHKERRQ(ierr); 3385 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3386 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3387 } else { 3388 PetscInt coarsening_ratio; 3389 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3390 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3391 prank = rank; 3392 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3393 /* 3394 for (i=0;i<size;i++) { 3395 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3396 } 3397 */ 3398 for (i=0;i<xadj[1];i++) { 3399 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3400 } 3401 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3402 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3403 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3404 3405 /* Partition */ 3406 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3407 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3408 if (use_vwgt) { 3409 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3410 v_wgt[0] = local_size; 3411 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3412 } 3413 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3414 coarsening_ratio = size/n_subdomains; 3415 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3416 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3417 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3418 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3419 3420 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3421 if (contiguous) { 3422 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3423 } else { 3424 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3425 } 3426 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3427 /* clean up */ 3428 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3429 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3430 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3431 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3432 } 3433 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3434 3435 /* assemble parallel IS for sends */ 3436 i = 1; 3437 if (color) i=0; 3438 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3439 3440 /* get back IS */ 3441 *is_sends = ranks_send_to; 3442 PetscFunctionReturn(0); 3443 } 3444 3445 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3446 3447 #undef __FUNCT__ 3448 #define __FUNCT__ "MatISSubassemble" 3449 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3450 { 3451 Mat local_mat; 3452 Mat_IS *matis; 3453 IS is_sends_internal; 3454 PetscInt rows,cols,new_local_rows; 3455 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3456 PetscBool ismatis,isdense,newisdense,destroy_mat; 3457 ISLocalToGlobalMapping l2gmap; 3458 PetscInt* l2gmap_indices; 3459 const PetscInt* is_indices; 3460 MatType new_local_type; 3461 /* buffers */ 3462 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3463 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3464 PetscInt *recv_buffer_idxs_local; 3465 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3466 /* MPI */ 3467 MPI_Comm comm,comm_n; 3468 PetscSubcomm subcomm; 3469 PetscMPIInt n_sends,n_recvs,commsize; 3470 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3471 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3472 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3473 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3474 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3475 PetscErrorCode ierr; 3476 3477 PetscFunctionBegin; 3478 /* TODO: add missing checks */ 3479 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3480 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3481 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3482 PetscValidLogicalCollectiveInt(mat,nis,7); 3483 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3484 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3485 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3486 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3487 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3488 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3489 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3490 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3491 PetscInt mrows,mcols,mnrows,mncols; 3492 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3493 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3494 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3495 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3496 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3497 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3498 } 3499 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3500 PetscValidLogicalCollectiveInt(mat,bs,0); 3501 /* prepare IS for sending if not provided */ 3502 if (!is_sends) { 3503 PetscBool pcontig = PETSC_TRUE; 3504 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3505 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3506 } else { 3507 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3508 is_sends_internal = is_sends; 3509 } 3510 3511 /* get pointer of MATIS data */ 3512 matis = (Mat_IS*)mat->data; 3513 3514 /* get comm */ 3515 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3516 3517 /* compute number of sends */ 3518 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3519 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3520 3521 /* compute number of receives */ 3522 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3523 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3524 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3525 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3526 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3527 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3528 ierr = PetscFree(iflags);CHKERRQ(ierr); 3529 3530 /* restrict comm if requested */ 3531 subcomm = 0; 3532 destroy_mat = PETSC_FALSE; 3533 if (restrict_comm) { 3534 PetscMPIInt color,subcommsize; 3535 3536 color = 0; 3537 if (!n_recvs && n_sends) color = 1; /* processes sending and not receiving anything will not partecipate in new comm */ 3538 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3539 subcommsize = commsize - subcommsize; 3540 /* check if reuse has been requested */ 3541 if (reuse == MAT_REUSE_MATRIX) { 3542 if (*mat_n) { 3543 PetscMPIInt subcommsize2; 3544 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3545 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3546 comm_n = PetscObjectComm((PetscObject)*mat_n); 3547 } else { 3548 comm_n = PETSC_COMM_SELF; 3549 } 3550 } else { /* MAT_INITIAL_MATRIX */ 3551 PetscMPIInt rank; 3552 3553 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3554 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3555 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3556 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3557 comm_n = PetscSubcommChild(subcomm); 3558 } 3559 /* flag to destroy *mat_n if not significative */ 3560 if (color) destroy_mat = PETSC_TRUE; 3561 } else { 3562 comm_n = comm; 3563 } 3564 3565 /* prepare send/receive buffers */ 3566 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3567 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3568 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3569 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3570 if (nis) { 3571 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3572 } 3573 3574 /* Get data from local matrices */ 3575 if (!isdense) { 3576 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3577 /* TODO: See below some guidelines on how to prepare the local buffers */ 3578 /* 3579 send_buffer_vals should contain the raw values of the local matrix 3580 send_buffer_idxs should contain: 3581 - MatType_PRIVATE type 3582 - PetscInt size_of_l2gmap 3583 - PetscInt global_row_indices[size_of_l2gmap] 3584 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3585 */ 3586 } else { 3587 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3588 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3589 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3590 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3591 send_buffer_idxs[1] = i; 3592 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3593 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3594 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3595 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3596 for (i=0;i<n_sends;i++) { 3597 ilengths_vals[is_indices[i]] = len*len; 3598 ilengths_idxs[is_indices[i]] = len+2; 3599 } 3600 } 3601 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3602 /* additional is (if any) */ 3603 if (nis) { 3604 PetscMPIInt psum; 3605 PetscInt j; 3606 for (j=0,psum=0;j<nis;j++) { 3607 PetscInt plen; 3608 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3609 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3610 psum += len+1; /* indices + lenght */ 3611 } 3612 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3613 for (j=0,psum=0;j<nis;j++) { 3614 PetscInt plen; 3615 const PetscInt *is_array_idxs; 3616 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3617 send_buffer_idxs_is[psum] = plen; 3618 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3619 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3620 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3621 psum += plen+1; /* indices + lenght */ 3622 } 3623 for (i=0;i<n_sends;i++) { 3624 ilengths_idxs_is[is_indices[i]] = psum; 3625 } 3626 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3627 } 3628 3629 buf_size_idxs = 0; 3630 buf_size_vals = 0; 3631 buf_size_idxs_is = 0; 3632 for (i=0;i<n_recvs;i++) { 3633 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3634 buf_size_vals += (PetscInt)olengths_vals[i]; 3635 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3636 } 3637 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3638 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3639 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3640 3641 /* get new tags for clean communications */ 3642 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3643 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3644 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3645 3646 /* allocate for requests */ 3647 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3648 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3649 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3650 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3651 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3652 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3653 3654 /* communications */ 3655 ptr_idxs = recv_buffer_idxs; 3656 ptr_vals = recv_buffer_vals; 3657 ptr_idxs_is = recv_buffer_idxs_is; 3658 for (i=0;i<n_recvs;i++) { 3659 source_dest = onodes[i]; 3660 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3661 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3662 ptr_idxs += olengths_idxs[i]; 3663 ptr_vals += olengths_vals[i]; 3664 if (nis) { 3665 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3666 ptr_idxs_is += olengths_idxs_is[i]; 3667 } 3668 } 3669 for (i=0;i<n_sends;i++) { 3670 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3671 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3672 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3673 if (nis) { 3674 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3675 } 3676 } 3677 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3678 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3679 3680 /* assemble new l2g map */ 3681 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3682 ptr_idxs = recv_buffer_idxs; 3683 new_local_rows = 0; 3684 for (i=0;i<n_recvs;i++) { 3685 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3686 ptr_idxs += olengths_idxs[i]; 3687 } 3688 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3689 ptr_idxs = recv_buffer_idxs; 3690 new_local_rows = 0; 3691 for (i=0;i<n_recvs;i++) { 3692 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3693 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3694 ptr_idxs += olengths_idxs[i]; 3695 } 3696 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3697 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3698 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3699 3700 /* infer new local matrix type from received local matrices type */ 3701 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3702 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3703 if (n_recvs) { 3704 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3705 ptr_idxs = recv_buffer_idxs; 3706 for (i=0;i<n_recvs;i++) { 3707 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3708 new_local_type_private = MATAIJ_PRIVATE; 3709 break; 3710 } 3711 ptr_idxs += olengths_idxs[i]; 3712 } 3713 switch (new_local_type_private) { 3714 case MATDENSE_PRIVATE: 3715 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3716 new_local_type = MATSEQAIJ; 3717 bs = 1; 3718 } else { /* if I receive only 1 dense matrix */ 3719 new_local_type = MATSEQDENSE; 3720 bs = 1; 3721 } 3722 break; 3723 case MATAIJ_PRIVATE: 3724 new_local_type = MATSEQAIJ; 3725 bs = 1; 3726 break; 3727 case MATBAIJ_PRIVATE: 3728 new_local_type = MATSEQBAIJ; 3729 break; 3730 case MATSBAIJ_PRIVATE: 3731 new_local_type = MATSEQSBAIJ; 3732 break; 3733 default: 3734 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3735 break; 3736 } 3737 } else { /* by default, new_local_type is seqdense */ 3738 new_local_type = MATSEQDENSE; 3739 bs = 1; 3740 } 3741 3742 /* create MATIS object if needed */ 3743 if (reuse == MAT_INITIAL_MATRIX) { 3744 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3745 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3746 } else { 3747 /* it also destroys the local matrices */ 3748 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3749 } 3750 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3751 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3752 3753 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3754 3755 /* Global to local map of received indices */ 3756 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3757 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3758 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3759 3760 /* restore attributes -> type of incoming data and its size */ 3761 buf_size_idxs = 0; 3762 for (i=0;i<n_recvs;i++) { 3763 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3764 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3765 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3766 } 3767 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3768 3769 /* set preallocation */ 3770 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3771 if (!newisdense) { 3772 PetscInt *new_local_nnz=0; 3773 3774 ptr_vals = recv_buffer_vals; 3775 ptr_idxs = recv_buffer_idxs_local; 3776 if (n_recvs) { 3777 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3778 } 3779 for (i=0;i<n_recvs;i++) { 3780 PetscInt j; 3781 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3782 for (j=0;j<*(ptr_idxs+1);j++) { 3783 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3784 } 3785 } else { 3786 /* TODO */ 3787 } 3788 ptr_idxs += olengths_idxs[i]; 3789 } 3790 if (new_local_nnz) { 3791 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3792 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3793 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3794 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3795 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3796 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3797 } else { 3798 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3799 } 3800 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3801 } else { 3802 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3803 } 3804 3805 /* set values */ 3806 ptr_vals = recv_buffer_vals; 3807 ptr_idxs = recv_buffer_idxs_local; 3808 for (i=0;i<n_recvs;i++) { 3809 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3810 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3811 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3812 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3813 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3814 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3815 } else { 3816 /* TODO */ 3817 } 3818 ptr_idxs += olengths_idxs[i]; 3819 ptr_vals += olengths_vals[i]; 3820 } 3821 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3822 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3823 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3824 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3825 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3826 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3827 3828 #if 0 3829 if (!restrict_comm) { /* check */ 3830 Vec lvec,rvec; 3831 PetscReal infty_error; 3832 3833 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3834 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3835 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3836 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3837 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3838 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3839 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3840 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3841 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3842 } 3843 #endif 3844 3845 /* assemble new additional is (if any) */ 3846 if (nis) { 3847 PetscInt **temp_idxs,*count_is,j,psum; 3848 3849 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3850 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3851 ptr_idxs = recv_buffer_idxs_is; 3852 psum = 0; 3853 for (i=0;i<n_recvs;i++) { 3854 for (j=0;j<nis;j++) { 3855 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3856 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3857 psum += plen; 3858 ptr_idxs += plen+1; /* shift pointer to received data */ 3859 } 3860 } 3861 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3862 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3863 for (i=1;i<nis;i++) { 3864 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3865 } 3866 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3867 ptr_idxs = recv_buffer_idxs_is; 3868 for (i=0;i<n_recvs;i++) { 3869 for (j=0;j<nis;j++) { 3870 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3871 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3872 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3873 ptr_idxs += plen+1; /* shift pointer to received data */ 3874 } 3875 } 3876 for (i=0;i<nis;i++) { 3877 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3878 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3879 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3880 } 3881 ierr = PetscFree(count_is);CHKERRQ(ierr); 3882 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3883 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3884 } 3885 /* free workspace */ 3886 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3887 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3888 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3889 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3890 if (isdense) { 3891 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3892 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3893 } else { 3894 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3895 } 3896 if (nis) { 3897 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3898 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3899 } 3900 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3901 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3902 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3903 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3904 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3905 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3906 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3907 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3908 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3909 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3910 ierr = PetscFree(onodes);CHKERRQ(ierr); 3911 if (nis) { 3912 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3913 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3914 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3915 } 3916 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3917 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3918 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3919 for (i=0;i<nis;i++) { 3920 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3921 } 3922 } 3923 PetscFunctionReturn(0); 3924 } 3925 3926 /* temporary hack into ksp private data structure */ 3927 #include <petsc/private/kspimpl.h> 3928 3929 #undef __FUNCT__ 3930 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3931 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3932 { 3933 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3934 PC_IS *pcis = (PC_IS*)pc->data; 3935 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3936 MatNullSpace CoarseNullSpace=NULL; 3937 ISLocalToGlobalMapping coarse_islg; 3938 IS coarse_is,*isarray; 3939 PetscInt i,im_active=-1,active_procs=-1; 3940 PetscInt nis,nisdofs,nisneu; 3941 PC pc_temp; 3942 PCType coarse_pc_type; 3943 KSPType coarse_ksp_type; 3944 PetscBool multilevel_requested,multilevel_allowed; 3945 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3946 Mat t_coarse_mat_is; 3947 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3948 PetscMPIInt all_procs; 3949 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3950 PetscBool compute_vecs = PETSC_FALSE; 3951 PetscScalar *array; 3952 PetscErrorCode ierr; 3953 3954 PetscFunctionBegin; 3955 /* Assign global numbering to coarse dofs */ 3956 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3957 PetscInt ocoarse_size; 3958 compute_vecs = PETSC_TRUE; 3959 ocoarse_size = pcbddc->coarse_size; 3960 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3961 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3962 /* see if we can avoid some work */ 3963 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3964 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3965 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3966 coarse_reuse = PETSC_FALSE; 3967 } else { /* we can safely reuse already computed coarse matrix */ 3968 coarse_reuse = PETSC_TRUE; 3969 } 3970 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3971 coarse_reuse = PETSC_FALSE; 3972 } 3973 /* reset any subassembling information */ 3974 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3975 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3976 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3977 coarse_reuse = PETSC_TRUE; 3978 } 3979 3980 /* count "active" (i.e. with positive local size) and "void" processes */ 3981 im_active = !!(pcis->n); 3982 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3983 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3984 void_procs = all_procs-active_procs; 3985 csin_type_simple = PETSC_TRUE; 3986 redist = PETSC_FALSE; 3987 if (pcbddc->current_level && void_procs) { 3988 csin_ml = PETSC_TRUE; 3989 ncoarse_ml = void_procs; 3990 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3991 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3992 csin_ds = PETSC_TRUE; 3993 ncoarse_ds = pcbddc->redistribute_coarse; 3994 redist = PETSC_TRUE; 3995 } else { 3996 csin_ds = PETSC_TRUE; 3997 ncoarse_ds = active_procs; 3998 redist = PETSC_TRUE; 3999 } 4000 } else { 4001 csin_ml = PETSC_FALSE; 4002 ncoarse_ml = all_procs; 4003 if (void_procs) { 4004 csin_ds = PETSC_TRUE; 4005 ncoarse_ds = void_procs; 4006 csin_type_simple = PETSC_FALSE; 4007 } else { 4008 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4009 csin_ds = PETSC_TRUE; 4010 ncoarse_ds = pcbddc->redistribute_coarse; 4011 redist = PETSC_TRUE; 4012 } else { 4013 csin_ds = PETSC_FALSE; 4014 ncoarse_ds = all_procs; 4015 } 4016 } 4017 } 4018 4019 /* 4020 test if we can go multilevel: three conditions must be satisfied: 4021 - we have not exceeded the number of levels requested 4022 - we can actually subassemble the active processes 4023 - we can find a suitable number of MPI processes where we can place the subassembled problem 4024 */ 4025 multilevel_allowed = PETSC_FALSE; 4026 multilevel_requested = PETSC_FALSE; 4027 if (pcbddc->current_level < pcbddc->max_levels) { 4028 multilevel_requested = PETSC_TRUE; 4029 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4030 multilevel_allowed = PETSC_FALSE; 4031 } else { 4032 multilevel_allowed = PETSC_TRUE; 4033 } 4034 } 4035 /* determine number of process partecipating to coarse solver */ 4036 if (multilevel_allowed) { 4037 ncoarse = ncoarse_ml; 4038 csin = csin_ml; 4039 redist = PETSC_FALSE; 4040 } else { 4041 ncoarse = ncoarse_ds; 4042 csin = csin_ds; 4043 } 4044 4045 /* creates temporary l2gmap and IS for coarse indexes */ 4046 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4047 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4048 4049 /* creates temporary MATIS object for coarse matrix */ 4050 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4051 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4052 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4053 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4054 #if 0 4055 { 4056 PetscViewer viewer; 4057 char filename[256]; 4058 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4059 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4060 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4061 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4062 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4063 } 4064 #endif 4065 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4066 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4067 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4068 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4069 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4070 4071 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4072 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4073 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4074 const PetscInt *idxs; 4075 ISLocalToGlobalMapping tmap; 4076 4077 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4078 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4079 /* allocate space for temporary storage */ 4080 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4081 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4082 /* allocate for IS array */ 4083 nisdofs = pcbddc->n_ISForDofsLocal; 4084 nisneu = !!pcbddc->NeumannBoundariesLocal; 4085 nis = nisdofs + nisneu; 4086 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4087 /* dofs splitting */ 4088 for (i=0;i<nisdofs;i++) { 4089 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4090 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4091 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4092 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4093 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4094 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4095 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4096 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4097 } 4098 /* neumann boundaries */ 4099 if (pcbddc->NeumannBoundariesLocal) { 4100 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4101 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4102 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4103 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4104 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4105 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4106 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4107 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4108 } 4109 /* free memory */ 4110 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4111 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4112 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4113 } else { 4114 nis = 0; 4115 nisdofs = 0; 4116 nisneu = 0; 4117 isarray = NULL; 4118 } 4119 /* destroy no longer needed map */ 4120 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4121 4122 /* restrict on coarse candidates (if needed) */ 4123 coarse_mat_is = NULL; 4124 if (csin) { 4125 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4126 if (redist) { 4127 PetscMPIInt rank; 4128 PetscInt spc,n_spc_p1,dest[1],destsize; 4129 4130 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4131 spc = active_procs/ncoarse; 4132 n_spc_p1 = active_procs%ncoarse; 4133 if (im_active) { 4134 destsize = 1; 4135 if (rank > n_spc_p1*(spc+1)-1) { 4136 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4137 } else { 4138 dest[0] = rank/(spc+1); 4139 } 4140 } else { 4141 destsize = 0; 4142 } 4143 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4144 } else if (csin_type_simple) { 4145 PetscMPIInt rank; 4146 PetscInt issize,isidx; 4147 4148 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4149 if (im_active) { 4150 issize = 1; 4151 isidx = (PetscInt)rank; 4152 } else { 4153 issize = 0; 4154 isidx = -1; 4155 } 4156 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4157 } else { /* get a suitable subassembling pattern from MATIS code */ 4158 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4159 } 4160 4161 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4162 if (!redist || ncoarse <= void_procs) { 4163 PetscInt ncoarse_cand,tissize,*nisindices; 4164 PetscInt *coarse_candidates; 4165 const PetscInt* tisindices; 4166 4167 /* get coarse candidates' ranks in pc communicator */ 4168 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4169 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4170 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4171 if (!coarse_candidates[i]) { 4172 coarse_candidates[ncoarse_cand++]=i; 4173 } 4174 } 4175 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4176 4177 4178 if (pcbddc->dbg_flag) { 4179 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4180 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4181 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4182 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4183 for (i=0;i<ncoarse_cand;i++) { 4184 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4185 } 4186 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4187 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4188 } 4189 /* shift the pattern on coarse candidates */ 4190 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4191 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4192 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4193 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4194 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4195 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4196 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4197 } 4198 if (pcbddc->dbg_flag) { 4199 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4200 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4201 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4202 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4203 } 4204 } 4205 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4206 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4207 } else { 4208 if (pcbddc->dbg_flag) { 4209 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4210 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4211 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4212 } 4213 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4214 coarse_mat_is = t_coarse_mat_is; 4215 } 4216 4217 /* create local to global scatters for coarse problem */ 4218 if (compute_vecs) { 4219 PetscInt lrows; 4220 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4221 if (coarse_mat_is) { 4222 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4223 } else { 4224 lrows = 0; 4225 } 4226 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4227 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4228 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4229 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4230 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4231 } 4232 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4233 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4234 4235 /* set defaults for coarse KSP and PC */ 4236 if (multilevel_allowed) { 4237 coarse_ksp_type = KSPRICHARDSON; 4238 coarse_pc_type = PCBDDC; 4239 } else { 4240 coarse_ksp_type = KSPPREONLY; 4241 coarse_pc_type = PCREDUNDANT; 4242 } 4243 4244 /* print some info if requested */ 4245 if (pcbddc->dbg_flag) { 4246 if (!multilevel_allowed) { 4247 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4248 if (multilevel_requested) { 4249 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4250 } else if (pcbddc->max_levels) { 4251 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4252 } 4253 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4254 } 4255 } 4256 4257 /* create the coarse KSP object only once with defaults */ 4258 if (coarse_mat_is) { 4259 MatReuse coarse_mat_reuse; 4260 PetscViewer dbg_viewer = NULL; 4261 if (pcbddc->dbg_flag) { 4262 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4263 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4264 } 4265 if (!pcbddc->coarse_ksp) { 4266 char prefix[256],str_level[16]; 4267 size_t len; 4268 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4269 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4270 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4271 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4272 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4273 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4274 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4275 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4276 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4277 /* prefix */ 4278 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4279 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4280 if (!pcbddc->current_level) { 4281 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4282 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4283 } else { 4284 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4285 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4286 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4287 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4288 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4289 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4290 } 4291 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4292 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4293 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4294 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4295 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4296 /* allow user customization */ 4297 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4298 } 4299 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4300 if (nisdofs) { 4301 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4302 for (i=0;i<nisdofs;i++) { 4303 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4304 } 4305 } 4306 if (nisneu) { 4307 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4308 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4309 } 4310 4311 /* get some info after set from options */ 4312 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4313 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4314 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4315 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4316 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4317 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4318 isbddc = PETSC_FALSE; 4319 } 4320 if (isredundant) { 4321 KSP inner_ksp; 4322 PC inner_pc; 4323 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4324 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4325 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4326 } 4327 4328 /* assemble coarse matrix */ 4329 if (coarse_reuse) { 4330 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4331 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4332 coarse_mat_reuse = MAT_REUSE_MATRIX; 4333 } else { 4334 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4335 } 4336 if (isbddc || isnn) { 4337 if (pcbddc->coarsening_ratio > 1) { 4338 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4339 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4340 if (pcbddc->dbg_flag) { 4341 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4342 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4343 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4344 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4345 } 4346 } 4347 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4348 } else { 4349 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4350 coarse_mat = coarse_mat_is; 4351 } 4352 } else { 4353 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4354 } 4355 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4356 4357 /* propagate symmetry info to coarse matrix */ 4358 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4359 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4360 4361 /* set operators */ 4362 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4363 if (pcbddc->dbg_flag) { 4364 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4365 } 4366 } else { /* processes non partecipating to coarse solver (if any) */ 4367 coarse_mat = 0; 4368 } 4369 ierr = PetscFree(isarray);CHKERRQ(ierr); 4370 #if 0 4371 { 4372 PetscViewer viewer; 4373 char filename[256]; 4374 sprintf(filename,"coarse_mat.m"); 4375 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4376 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4377 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4378 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4379 } 4380 #endif 4381 4382 /* Compute coarse null space (special handling by BDDC only) */ 4383 if (pcbddc->NullSpace) { 4384 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4385 } 4386 4387 if (pcbddc->coarse_ksp) { 4388 Vec crhs,csol; 4389 PetscBool ispreonly; 4390 if (CoarseNullSpace) { 4391 if (isbddc) { 4392 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4393 } else { 4394 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4395 } 4396 } 4397 /* setup coarse ksp */ 4398 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4399 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4400 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4401 /* hack */ 4402 if (!csol) { 4403 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4404 } 4405 if (!crhs) { 4406 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4407 } 4408 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4409 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4410 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4411 KSP check_ksp; 4412 KSPType check_ksp_type; 4413 PC check_pc; 4414 Vec check_vec,coarse_vec; 4415 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4416 PetscInt its; 4417 PetscBool compute_eigs; 4418 PetscReal *eigs_r,*eigs_c; 4419 PetscInt neigs; 4420 const char *prefix; 4421 4422 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4423 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4424 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4425 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4426 if (ispreonly) { 4427 check_ksp_type = KSPPREONLY; 4428 compute_eigs = PETSC_FALSE; 4429 } else { 4430 check_ksp_type = KSPGMRES; 4431 compute_eigs = PETSC_TRUE; 4432 } 4433 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4434 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4435 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4436 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4437 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4438 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4439 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4440 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4441 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4442 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4443 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4444 /* create random vec */ 4445 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4446 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4447 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4448 if (CoarseNullSpace) { 4449 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4450 } 4451 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4452 /* solve coarse problem */ 4453 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4454 if (CoarseNullSpace) { 4455 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4456 } 4457 /* set eigenvalue estimation if preonly has not been requested */ 4458 if (compute_eigs) { 4459 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4460 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4461 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4462 lambda_max = eigs_r[neigs-1]; 4463 lambda_min = eigs_r[0]; 4464 if (pcbddc->use_coarse_estimates) { 4465 if (lambda_max>lambda_min) { 4466 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4467 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4468 } 4469 } 4470 } 4471 4472 /* check coarse problem residual error */ 4473 if (pcbddc->dbg_flag) { 4474 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4475 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4476 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4477 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4478 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4479 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4480 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4481 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4482 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4483 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4484 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4485 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4486 if (compute_eigs) { 4487 PetscReal lambda_max_s,lambda_min_s; 4488 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4489 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4490 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4491 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4492 for (i=0;i<neigs;i++) { 4493 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4494 } 4495 } 4496 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4497 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4498 } 4499 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4500 if (compute_eigs) { 4501 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4502 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4503 } 4504 } 4505 } 4506 /* print additional info */ 4507 if (pcbddc->dbg_flag) { 4508 /* waits until all processes reaches this point */ 4509 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4510 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4511 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4512 } 4513 4514 /* free memory */ 4515 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4516 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4517 PetscFunctionReturn(0); 4518 } 4519 4520 #undef __FUNCT__ 4521 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4522 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4523 { 4524 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4525 PC_IS* pcis = (PC_IS*)pc->data; 4526 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4527 PetscInt i,local_size,coarse_size=0; 4528 PetscInt *local_primal_indices=NULL; 4529 PetscInt *t_local_primal_indices=NULL; 4530 PetscErrorCode ierr; 4531 4532 PetscFunctionBegin; 4533 /* Compute global number of coarse dofs */ 4534 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4535 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4536 } 4537 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr); 4538 ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr); 4539 local_size = 0; 4540 for (i=0;i<pcbddc->local_primal_size_cc;i++) { 4541 PetscInt j; 4542 for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j; 4543 } 4544 ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr); 4545 if (local_size != pcbddc->local_primal_size) { 4546 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4547 } 4548 4549 /* check numbering */ 4550 if (pcbddc->dbg_flag) { 4551 PetscScalar coarsesum,*array; 4552 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4553 4554 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4555 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4556 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4557 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4558 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4559 for (i=0;i<pcbddc->local_primal_size;i++) { 4560 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4561 } 4562 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4563 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4564 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4565 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4566 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4567 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4568 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4569 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4570 for (i=0;i<pcis->n;i++) { 4571 if (array[i] == 1.0) { 4572 set_error = PETSC_TRUE; 4573 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4574 } 4575 } 4576 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4577 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4578 for (i=0;i<pcis->n;i++) { 4579 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4580 } 4581 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4582 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4583 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4584 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4585 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4586 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4587 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4588 PetscInt *gidxs; 4589 4590 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4591 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4592 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4593 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4594 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4595 for (i=0;i<pcbddc->local_primal_size;i++) { 4596 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4597 } 4598 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4599 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4600 } 4601 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4602 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4603 } 4604 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4605 /* get back data */ 4606 *coarse_size_n = coarse_size; 4607 *local_primal_indices_n = local_primal_indices; 4608 PetscFunctionReturn(0); 4609 } 4610 4611 #undef __FUNCT__ 4612 #define __FUNCT__ "PCBDDCGlobalToLocal" 4613 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4614 { 4615 IS localis_t; 4616 PetscInt i,lsize,*idxs,n; 4617 PetscScalar *vals; 4618 PetscErrorCode ierr; 4619 4620 PetscFunctionBegin; 4621 /* get indices in local ordering exploiting local to global map */ 4622 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4623 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4624 for (i=0;i<lsize;i++) vals[i] = 1.0; 4625 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4626 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4627 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4628 if (idxs) { /* multilevel guard */ 4629 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4630 } 4631 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4632 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4633 ierr = PetscFree(vals);CHKERRQ(ierr); 4634 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4635 /* now compute set in local ordering */ 4636 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4637 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4638 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4639 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4640 for (i=0,lsize=0;i<n;i++) { 4641 if (PetscRealPart(vals[i]) > 0.5) { 4642 lsize++; 4643 } 4644 } 4645 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4646 for (i=0,lsize=0;i<n;i++) { 4647 if (PetscRealPart(vals[i]) > 0.5) { 4648 idxs[lsize++] = i; 4649 } 4650 } 4651 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4652 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4653 *localis = localis_t; 4654 PetscFunctionReturn(0); 4655 } 4656 4657 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4658 #undef __FUNCT__ 4659 #define __FUNCT__ "PCBDDCMatMult_Private" 4660 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4661 { 4662 PCBDDCChange_ctx change_ctx; 4663 PetscErrorCode ierr; 4664 4665 PetscFunctionBegin; 4666 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4667 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4668 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4669 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4670 PetscFunctionReturn(0); 4671 } 4672 4673 #undef __FUNCT__ 4674 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4675 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4676 { 4677 PCBDDCChange_ctx change_ctx; 4678 PetscErrorCode ierr; 4679 4680 PetscFunctionBegin; 4681 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4682 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4683 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4684 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4685 PetscFunctionReturn(0); 4686 } 4687 4688 #undef __FUNCT__ 4689 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4690 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4691 { 4692 PC_IS *pcis=(PC_IS*)pc->data; 4693 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4694 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4695 Mat S_j; 4696 PetscInt *used_xadj,*used_adjncy; 4697 PetscBool free_used_adj; 4698 PetscErrorCode ierr; 4699 4700 PetscFunctionBegin; 4701 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4702 free_used_adj = PETSC_FALSE; 4703 if (pcbddc->sub_schurs_layers == -1) { 4704 used_xadj = NULL; 4705 used_adjncy = NULL; 4706 } else { 4707 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4708 used_xadj = pcbddc->mat_graph->xadj; 4709 used_adjncy = pcbddc->mat_graph->adjncy; 4710 } else if (pcbddc->computed_rowadj) { 4711 used_xadj = pcbddc->mat_graph->xadj; 4712 used_adjncy = pcbddc->mat_graph->adjncy; 4713 } else { 4714 PetscBool flg_row=PETSC_FALSE; 4715 const PetscInt *xadj,*adjncy; 4716 PetscInt nvtxs; 4717 4718 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4719 if (flg_row) { 4720 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4721 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4722 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4723 free_used_adj = PETSC_TRUE; 4724 } else { 4725 pcbddc->sub_schurs_layers = -1; 4726 used_xadj = NULL; 4727 used_adjncy = NULL; 4728 } 4729 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4730 } 4731 } 4732 4733 /* setup sub_schurs data */ 4734 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4735 if (!sub_schurs->use_mumps) { 4736 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4737 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4738 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4739 } else { 4740 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4741 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4742 } 4743 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4744 4745 /* free adjacency */ 4746 if (free_used_adj) { 4747 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4748 } 4749 PetscFunctionReturn(0); 4750 } 4751 4752 #undef __FUNCT__ 4753 #define __FUNCT__ "PCBDDCInitSubSchurs" 4754 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4755 { 4756 PC_IS *pcis=(PC_IS*)pc->data; 4757 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4758 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4759 PCBDDCGraph graph; 4760 PetscErrorCode ierr; 4761 4762 PetscFunctionBegin; 4763 /* attach interface graph for determining subsets */ 4764 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4765 IS verticesIS; 4766 4767 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4768 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4769 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4770 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4771 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4772 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4773 /* 4774 if (pcbddc->dbg_flag) { 4775 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4776 } 4777 */ 4778 } else { 4779 graph = pcbddc->mat_graph; 4780 } 4781 4782 /* sub_schurs init */ 4783 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4784 4785 /* free graph struct */ 4786 if (pcbddc->sub_schurs_rebuild) { 4787 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4788 } 4789 PetscFunctionReturn(0); 4790 } 4791