1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 10 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 const PetscInt *idxs; 14 PetscInt nz = 0; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 if (!pcbddc->zerodiag) { 19 PetscFunctionReturn(0); 20 } else { 21 ierr = ISGetLocalSize(pcbddc->zerodiag,&nz);CHKERRQ(ierr); 22 if (!nz) PetscFunctionReturn(0); 23 } 24 ierr = ISGetIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr); 25 /* TODO: add error checking 26 - avoid nested pop (or push) calls. 27 - cannot push before pop. 28 */ 29 if (pop) { 30 const PetscInt *cB0_cols; 31 PetscInt cB0_ncol; 32 const PetscScalar *cB0_vals; 33 34 /* extract B_0 */ 35 ierr = MatGetRow(pcbddc->local_mat,idxs[nz-1],&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 36 pcbddc->B0_ncol = cB0_ncol; 37 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 38 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 39 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 40 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 41 ierr = MatRestoreRow(pcbddc->local_mat,idxs[nz-1],&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 42 /* remove rows and cols from local problem */ 43 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 44 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,idxs+nz-1,1.,NULL,NULL);CHKERRQ(ierr); 45 } else { /* push */ 46 ierr = MatSetValues(pcbddc->local_mat,1,&idxs[nz-1],pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 47 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 48 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 49 } 50 PetscFunctionReturn(0); 51 } 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "PCBDDCAdaptiveSelection" 55 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 56 { 57 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 58 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 59 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 60 PetscBLASInt *B_iwork,*B_ifail; 61 PetscScalar *work,lwork; 62 PetscScalar *St,*S,*eigv; 63 PetscScalar *Sarray,*Starray; 64 PetscReal *eigs,thresh; 65 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 66 PetscBool allocated_S_St; 67 #if defined(PETSC_USE_COMPLEX) 68 PetscReal *rwork; 69 #endif 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 if (!sub_schurs->use_mumps) { 74 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 75 } 76 77 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 78 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 79 } 80 81 if (pcbddc->dbg_flag) { 82 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 83 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 84 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 85 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 86 } 87 88 if (pcbddc->dbg_flag) { 89 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 90 } 91 92 /* max size of subsets */ 93 mss = 0; 94 for (i=0;i<sub_schurs->n_subs;i++) { 95 PetscInt subset_size; 96 97 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 98 mss = PetscMax(mss,subset_size); 99 } 100 101 /* min/max and threshold */ 102 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 103 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 104 nmax = PetscMax(nmin,nmax); 105 allocated_S_St = PETSC_FALSE; 106 if (nmin) { 107 allocated_S_St = PETSC_TRUE; 108 } 109 110 /* allocate lapack workspace */ 111 cum = cum2 = 0; 112 maxneigs = 0; 113 for (i=0;i<sub_schurs->n_subs;i++) { 114 PetscInt n,subset_size; 115 116 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 117 n = PetscMin(subset_size,nmax); 118 cum += subset_size; 119 cum2 += subset_size*n; 120 maxneigs = PetscMax(maxneigs,n); 121 } 122 if (mss) { 123 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 124 PetscBLASInt B_itype = 1; 125 PetscBLASInt B_N = mss; 126 PetscReal zero = 0.0; 127 PetscReal eps = 0.0; /* dlamch? */ 128 129 B_lwork = -1; 130 S = NULL; 131 St = NULL; 132 eigs = NULL; 133 eigv = NULL; 134 B_iwork = NULL; 135 B_ifail = NULL; 136 #if defined(PETSC_USE_COMPLEX) 137 rwork = NULL; 138 #endif 139 thresh = 1.0; 140 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 141 #if defined(PETSC_USE_COMPLEX) 142 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 143 #else 144 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 145 #endif 146 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 147 ierr = PetscFPTrapPop();CHKERRQ(ierr); 148 } else { 149 /* TODO */ 150 } 151 } else { 152 lwork = 0; 153 } 154 155 nv = 0; 156 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 157 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 158 } 159 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 160 if (allocated_S_St) { 161 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 162 } 163 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 164 #if defined(PETSC_USE_COMPLEX) 165 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 166 #endif 167 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 168 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 169 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 170 nv+cum,&pcbddc->adaptive_constraints_idxs, 171 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 172 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 173 174 maxneigs = 0; 175 cum = cum2 = cumarray = 0; 176 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 177 pcbddc->adaptive_constraints_data_ptr[0] = 0; 178 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 179 const PetscInt *idxs; 180 181 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 182 for (cum=0;cum<nv;cum++) { 183 pcbddc->adaptive_constraints_n[cum] = 1; 184 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 185 pcbddc->adaptive_constraints_data[cum] = 1.0; 186 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 187 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 188 } 189 cum2 = cum; 190 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 191 } 192 193 if (mss) { /* multilevel */ 194 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 195 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 196 } 197 198 for (i=0;i<sub_schurs->n_subs;i++) { 199 200 const PetscInt *idxs; 201 PetscReal infty = PETSC_MAX_REAL; 202 PetscInt j,subset_size,eigs_start = 0; 203 PetscBLASInt B_N; 204 PetscBool same_data = PETSC_FALSE; 205 206 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 207 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 208 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 209 if (sub_schurs->is_hermitian) { 210 PetscInt j,k; 211 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 212 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 213 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 214 } 215 for (j=0;j<subset_size;j++) { 216 for (k=j;k<subset_size;k++) { 217 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 218 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 219 } 220 } 221 } else { 222 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 223 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 224 } 225 } else { 226 S = Sarray + cumarray; 227 St = Starray + cumarray; 228 } 229 230 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 231 /* see if we can save some work */ 232 if (sub_schurs->n_subs == 1) { 233 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 234 } 235 236 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 237 B_neigs = 0; 238 } else { 239 /* Threshold: this is an heuristic for edges */ 240 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 241 242 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 243 PetscBLASInt B_itype = 1; 244 PetscBLASInt B_IL, B_IU; 245 PetscReal eps = -1.0; /* dlamch? */ 246 PetscInt nmin_s; 247 248 /* ask for eigenvalues larger than thresh */ 249 if (pcbddc->dbg_flag) { 250 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 251 } 252 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 253 #if defined(PETSC_USE_COMPLEX) 254 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 255 #else 256 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 257 #endif 258 ierr = PetscFPTrapPop();CHKERRQ(ierr); 259 if (B_ierr) { 260 if (B_ierr < 0 ) { 261 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 262 } else if (B_ierr <= B_N) { 263 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 264 } else { 265 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 266 } 267 } 268 269 if (B_neigs > nmax) { 270 if (pcbddc->dbg_flag) { 271 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 272 } 273 eigs_start = B_neigs -nmax; 274 B_neigs = nmax; 275 } 276 277 nmin_s = PetscMin(nmin,B_N); 278 if (B_neigs < nmin_s) { 279 PetscBLASInt B_neigs2; 280 281 B_IU = B_N - B_neigs; 282 B_IL = B_N - nmin_s + 1; 283 if (pcbddc->dbg_flag) { 284 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 285 } 286 if (sub_schurs->is_hermitian) { 287 PetscInt j; 288 for (j=0;j<subset_size;j++) { 289 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 290 } 291 for (j=0;j<subset_size;j++) { 292 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 293 } 294 } else { 295 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 296 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 297 } 298 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 299 #if defined(PETSC_USE_COMPLEX) 300 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 301 #else 302 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 303 #endif 304 ierr = PetscFPTrapPop();CHKERRQ(ierr); 305 B_neigs += B_neigs2; 306 } 307 if (B_ierr) { 308 if (B_ierr < 0 ) { 309 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 310 } else if (B_ierr <= B_N) { 311 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 312 } else { 313 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 314 } 315 } 316 if (pcbddc->dbg_flag) { 317 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 318 for (j=0;j<B_neigs;j++) { 319 if (eigs[j] == 0.0) { 320 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 321 } else { 322 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 323 } 324 } 325 } 326 } else { 327 /* TODO */ 328 } 329 } 330 maxneigs = PetscMax(B_neigs,maxneigs); 331 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 332 if (B_neigs) { 333 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 334 335 if (pcbddc->dbg_flag > 1) { 336 PetscInt ii; 337 for (ii=0;ii<B_neigs;ii++) { 338 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 339 for (j=0;j<B_N;j++) { 340 #if defined(PETSC_USE_COMPLEX) 341 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 342 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 343 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 344 #else 345 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 346 #endif 347 } 348 } 349 } 350 #if 0 351 for (j=0;j<B_neigs;j++) { 352 PetscBLASInt Blas_N,Blas_one = 1.0; 353 PetscScalar norm; 354 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 355 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 356 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 357 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 358 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 359 } else { 360 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 361 } 362 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 363 } 364 #endif 365 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 366 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 367 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 368 cum++; 369 } 370 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 371 /* shift for next computation */ 372 cumarray += subset_size*subset_size; 373 } 374 if (pcbddc->dbg_flag) { 375 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 376 } 377 378 if (mss) { 379 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 380 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 381 /* destroy matrices (junk) */ 382 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 383 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 384 } 385 if (allocated_S_St) { 386 ierr = PetscFree2(S,St);CHKERRQ(ierr); 387 } 388 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 389 #if defined(PETSC_USE_COMPLEX) 390 ierr = PetscFree(rwork);CHKERRQ(ierr); 391 #endif 392 if (pcbddc->dbg_flag) { 393 PetscInt maxneigs_r; 394 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 395 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 396 } 397 PetscFunctionReturn(0); 398 } 399 400 #undef __FUNCT__ 401 #define __FUNCT__ "PCBDDCSetUpSolvers" 402 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 403 { 404 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 405 PetscScalar *coarse_submat_vals; 406 PetscErrorCode ierr; 407 408 PetscFunctionBegin; 409 /* Setup local scatters R_to_B and (optionally) R_to_D */ 410 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 411 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 412 413 /* Setup local neumann solver ksp_R */ 414 /* PCBDDCSetUpLocalScatters should be called first! */ 415 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 416 417 /* Change global null space passed in by the user if change of basis has been requested */ 418 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 419 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 420 } 421 422 /* 423 Setup local correction and local part of coarse basis. 424 Gives back the dense local part of the coarse matrix in column major ordering 425 */ 426 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 427 428 /* Compute total number of coarse nodes and setup coarse solver */ 429 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 430 431 /* free */ 432 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "PCBDDCResetCustomization" 438 PetscErrorCode PCBDDCResetCustomization(PC pc) 439 { 440 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 441 PetscErrorCode ierr; 442 443 PetscFunctionBegin; 444 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 445 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 446 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 447 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 448 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 449 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 450 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 451 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 452 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 453 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 454 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 455 PetscFunctionReturn(0); 456 } 457 458 #undef __FUNCT__ 459 #define __FUNCT__ "PCBDDCResetTopography" 460 PetscErrorCode PCBDDCResetTopography(PC pc) 461 { 462 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 463 PetscErrorCode ierr; 464 465 PetscFunctionBegin; 466 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 467 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 468 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 469 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 470 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "PCBDDCResetSolvers" 476 PetscErrorCode PCBDDCResetSolvers(PC pc) 477 { 478 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 479 PetscScalar *array; 480 PetscErrorCode ierr; 481 482 PetscFunctionBegin; 483 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 484 if (pcbddc->coarse_phi_B) { 485 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 486 ierr = PetscFree(array);CHKERRQ(ierr); 487 } 488 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 489 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 490 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 491 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 492 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 493 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 494 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 495 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 496 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 497 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 498 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 499 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 500 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 501 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 502 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 503 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 504 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 505 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 506 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 507 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 508 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 509 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 510 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 511 ierr = ISDestroy(&pcbddc->zerodiag);CHKERRQ(ierr); 512 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 513 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNCT__ 518 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 519 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 520 { 521 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 522 PC_IS *pcis = (PC_IS*)pc->data; 523 VecType impVecType; 524 PetscInt n_constraints,n_R,old_size; 525 PetscErrorCode ierr; 526 527 PetscFunctionBegin; 528 if (!pcbddc->ConstraintMatrix) { 529 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 530 } 531 /* get sizes */ 532 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 533 n_R = pcis->n-pcbddc->n_vertices; 534 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 535 /* local work vectors (try to avoid unneeded work)*/ 536 /* R nodes */ 537 old_size = -1; 538 if (pcbddc->vec1_R) { 539 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 540 } 541 if (n_R != old_size) { 542 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 543 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 544 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 545 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 546 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 547 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 548 } 549 /* local primal dofs */ 550 old_size = -1; 551 if (pcbddc->vec1_P) { 552 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 553 } 554 if (pcbddc->local_primal_size != old_size) { 555 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 556 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 557 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 558 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 559 } 560 /* local explicit constraints */ 561 old_size = -1; 562 if (pcbddc->vec1_C) { 563 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 564 } 565 if (n_constraints && n_constraints != old_size) { 566 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 567 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 568 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 569 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 570 } 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "PCBDDCSetUpCorrection" 576 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 577 { 578 PetscErrorCode ierr; 579 /* pointers to pcis and pcbddc */ 580 PC_IS* pcis = (PC_IS*)pc->data; 581 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 582 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 583 /* submatrices of local problem */ 584 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 585 /* submatrices of local coarse problem */ 586 Mat S_VV,S_CV,S_VC,S_CC; 587 /* working matrices */ 588 Mat C_CR; 589 /* additional working stuff */ 590 PC pc_R; 591 Mat F; 592 PetscBool isLU,isCHOL,isILU; 593 594 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 595 PetscScalar *work; 596 PetscInt *idx_V_B; 597 PetscInt n,n_vertices,n_constraints; 598 PetscInt i,n_R,n_D,n_B; 599 PetscBool unsymmetric_check; 600 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 601 MatType impMatType; 602 /* some shortcuts to scalars */ 603 PetscScalar one=1.0,m_one=-1.0; 604 605 PetscFunctionBegin; 606 n_vertices = pcbddc->n_vertices; 607 n_constraints = pcbddc->local_primal_size-n_vertices; 608 /* Set Non-overlapping dimensions */ 609 n_B = pcis->n_B; 610 n_D = pcis->n - n_B; 611 n_R = pcis->n - n_vertices; 612 613 /* Set types for local objects needed by BDDC precondtioner */ 614 impMatType = MATSEQDENSE; 615 616 /* vertices in boundary numbering */ 617 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 618 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 619 if (i != n_vertices) { 620 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 621 } 622 623 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 624 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 625 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 626 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 627 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 628 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 629 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 630 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 631 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 632 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 633 634 unsymmetric_check = PETSC_FALSE; 635 /* allocate workspace */ 636 n = 0; 637 if (n_constraints) { 638 n += n_R*n_constraints; 639 } 640 if (n_vertices) { 641 n = PetscMax(2*n_R*n_vertices,n); 642 n = PetscMax((n_R+n_B)*n_vertices,n); 643 } 644 if (!pcbddc->symmetric_primal) { 645 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 646 unsymmetric_check = PETSC_TRUE; 647 } 648 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 649 650 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 651 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 652 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 653 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 654 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 655 if (isLU || isILU || isCHOL) { 656 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 657 } else if (sub_schurs->reuse_mumps) { 658 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 659 MatFactorType type; 660 661 F = reuse_mumps->F; 662 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 663 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 664 } else { 665 F = NULL; 666 } 667 668 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 669 if (n_constraints) { 670 Mat M1,M2,M3; 671 Mat auxmat; 672 IS is_aux; 673 PetscScalar *array,*array2; 674 675 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 676 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 677 678 /* Extract constraints on R nodes: C_{CR} */ 679 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 680 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 681 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 682 683 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 684 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 685 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 686 for (i=0;i<n_constraints;i++) { 687 const PetscScalar *row_cmat_values; 688 const PetscInt *row_cmat_indices; 689 PetscInt size_of_constraint,j; 690 691 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 692 for (j=0;j<size_of_constraint;j++) { 693 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 694 } 695 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 696 } 697 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 698 if (F) { 699 Mat B; 700 701 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 702 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 703 ierr = MatDestroy(&B);CHKERRQ(ierr); 704 } else { 705 PetscScalar *marr; 706 707 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 708 for (i=0;i<n_constraints;i++) { 709 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 710 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 711 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 712 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 713 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 714 } 715 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 716 } 717 if (!pcbddc->switch_static) { 718 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 719 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 720 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 721 for (i=0;i<n_constraints;i++) { 722 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 723 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 724 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 726 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 727 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 728 } 729 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 730 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 731 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 732 } else { 733 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 734 pcbddc->local_auxmat2 = local_auxmat2_R; 735 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 736 } 737 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 738 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 739 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 740 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 741 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 742 if (isCHOL) { 743 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 744 } else { 745 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 746 } 747 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 748 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 749 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 750 ierr = MatDestroy(&M2);CHKERRQ(ierr); 751 ierr = MatDestroy(&M3);CHKERRQ(ierr); 752 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 753 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 754 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 755 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 756 ierr = MatDestroy(&M1);CHKERRQ(ierr); 757 } 758 /* Get submatrices from subdomain matrix */ 759 if (n_vertices) { 760 IS is_aux; 761 762 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 763 IS tis; 764 765 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 766 ierr = ISSort(tis);CHKERRQ(ierr); 767 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 768 ierr = ISDestroy(&tis);CHKERRQ(ierr); 769 } else { 770 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 771 } 772 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 773 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 774 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 775 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 776 } 777 778 /* Matrix of coarse basis functions (local) */ 779 if (pcbddc->coarse_phi_B) { 780 PetscInt on_B,on_primal,on_D=n_D; 781 if (pcbddc->coarse_phi_D) { 782 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 783 } 784 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 785 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 786 PetscScalar *marray; 787 788 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 789 ierr = PetscFree(marray);CHKERRQ(ierr); 790 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 791 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 792 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 793 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 794 } 795 } 796 797 if (!pcbddc->coarse_phi_B) { 798 PetscScalar *marray; 799 800 n = n_B*pcbddc->local_primal_size; 801 if (pcbddc->switch_static || pcbddc->dbg_flag) { 802 n += n_D*pcbddc->local_primal_size; 803 } 804 if (!pcbddc->symmetric_primal) { 805 n *= 2; 806 } 807 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 808 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 809 n = n_B*pcbddc->local_primal_size; 810 if (pcbddc->switch_static || pcbddc->dbg_flag) { 811 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 812 n += n_D*pcbddc->local_primal_size; 813 } 814 if (!pcbddc->symmetric_primal) { 815 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 816 if (pcbddc->switch_static || pcbddc->dbg_flag) { 817 n = n_B*pcbddc->local_primal_size; 818 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 819 } 820 } else { 821 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 822 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 823 if (pcbddc->switch_static || pcbddc->dbg_flag) { 824 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 825 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 826 } 827 } 828 } 829 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 830 /* vertices */ 831 if (n_vertices) { 832 833 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 834 835 if (n_R) { 836 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 837 PetscBLASInt B_N,B_one = 1; 838 PetscScalar *x,*y; 839 PetscBool isseqaij; 840 841 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 842 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 843 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 844 if (F) { /* TODO could be optimized for symmetric problems */ 845 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 846 } else { 847 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 848 for (i=0;i<n_vertices;i++) { 849 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 850 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 851 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 852 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 853 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 854 } 855 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 856 } 857 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 858 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 859 if (n_constraints) { 860 Mat B; 861 862 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 863 for (i=0;i<n_vertices;i++) { 864 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 865 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 866 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 869 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 870 } 871 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 872 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 873 ierr = MatDestroy(&B);CHKERRQ(ierr); 874 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 875 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 876 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 877 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 878 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 879 ierr = MatDestroy(&B);CHKERRQ(ierr); 880 } 881 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 882 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 883 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 884 } 885 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 886 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 887 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 888 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 889 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 890 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 891 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 892 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 893 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 894 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 895 } else { 896 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 897 } 898 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 899 /* coarse basis functions */ 900 for (i=0;i<n_vertices;i++) { 901 PetscScalar *y; 902 903 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 904 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 905 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 906 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 907 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 y[n_B*i+idx_V_B[i]] = 1.0; 909 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 910 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 911 912 if (pcbddc->switch_static || pcbddc->dbg_flag) { 913 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 914 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 915 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 918 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 919 } 920 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 921 } 922 /* if n_R == 0 the object is not destroyed */ 923 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 924 } 925 926 if (n_constraints) { 927 Mat B; 928 929 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 930 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 931 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 932 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 933 if (n_vertices) { 934 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 935 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 936 } else { 937 Mat S_VCt; 938 939 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 940 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 941 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 942 } 943 } 944 ierr = MatDestroy(&B);CHKERRQ(ierr); 945 /* coarse basis functions */ 946 for (i=0;i<n_constraints;i++) { 947 PetscScalar *y; 948 949 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 950 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 951 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 952 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 953 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 954 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 955 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 956 if (pcbddc->switch_static || pcbddc->dbg_flag) { 957 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 958 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 959 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 960 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 961 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 962 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 963 } 964 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 965 } 966 } 967 if (n_constraints) { 968 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 969 } 970 971 /* compute other basis functions for non-symmetric problems */ 972 if (!pcbddc->symmetric_primal) { 973 974 if (n_constraints) { 975 Mat S_CCT,B_C; 976 977 /* this is a lazy thing */ 978 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 979 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 980 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 981 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 982 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 983 if (n_vertices) { 984 Mat B_V,S_VCT; 985 986 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 987 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 988 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 989 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 990 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 991 } 992 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 993 } else { /* if there are no constraints, reset work */ 994 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 995 } 996 if (n_vertices && n_R) { 997 Mat A_VRT; 998 PetscScalar *marray; 999 PetscBLASInt B_N,B_one = 1; 1000 1001 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1002 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1003 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1004 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1005 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1006 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1007 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1008 } 1009 1010 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1011 for (i=0;i<pcbddc->local_primal_size;i++) { 1012 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1013 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1014 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1015 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1016 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1017 } 1018 } else { 1019 for (i=0;i<pcbddc->local_primal_size;i++) { 1020 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1021 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1022 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1023 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1024 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1025 } 1026 } 1027 /* coarse basis functions */ 1028 for (i=0;i<pcbddc->local_primal_size;i++) { 1029 PetscScalar *y; 1030 1031 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1032 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1033 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1034 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 if (i<n_vertices) { 1037 y[n_B*i+idx_V_B[i]] = 1.0; 1038 } 1039 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1040 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1041 1042 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1043 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1044 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1045 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1046 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1047 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1048 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1049 } 1050 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1051 } 1052 } 1053 /* free memory */ 1054 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1055 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1056 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1057 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1058 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1059 ierr = PetscFree(work);CHKERRQ(ierr); 1060 if (n_vertices) { 1061 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1062 } 1063 if (n_constraints) { 1064 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1065 } 1066 /* Checking coarse_sub_mat and coarse basis functios */ 1067 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1068 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1069 if (pcbddc->dbg_flag) { 1070 Mat coarse_sub_mat; 1071 Mat AUXMAT,TM1,TM2,TM3,TM4; 1072 Mat coarse_phi_D,coarse_phi_B; 1073 Mat coarse_psi_D,coarse_psi_B; 1074 Mat A_II,A_BB,A_IB,A_BI; 1075 Mat C_B,CPHI; 1076 IS is_dummy; 1077 Vec mones; 1078 MatType checkmattype=MATSEQAIJ; 1079 PetscReal real_value; 1080 1081 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1082 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1083 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1084 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1085 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1086 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1087 if (unsymmetric_check) { 1088 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1089 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1090 } 1091 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1092 1093 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1094 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1095 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1096 if (unsymmetric_check) { 1097 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1098 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1099 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1100 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1101 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1102 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1103 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1104 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1105 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1106 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1107 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1108 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1109 } else { 1110 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1111 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1112 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1113 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1114 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1115 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1116 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1117 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1118 } 1119 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1120 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1121 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1122 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1123 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1124 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1125 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1126 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1127 1128 /* check constraints */ 1129 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1130 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1131 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1132 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1133 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1134 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1135 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1136 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1137 if (unsymmetric_check) { 1138 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1139 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1140 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1141 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1142 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1143 } 1144 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1145 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1146 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1147 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1148 1149 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1150 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1151 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1152 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1153 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1154 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1155 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1156 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1157 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1158 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1159 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1160 if (unsymmetric_check) { 1161 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1162 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1163 } 1164 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1165 } 1166 /* get back data */ 1167 *coarse_submat_vals_n = coarse_submat_vals; 1168 PetscFunctionReturn(0); 1169 } 1170 1171 #undef __FUNCT__ 1172 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1173 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1174 { 1175 Mat *work_mat; 1176 IS isrow_s,iscol_s; 1177 PetscBool rsorted,csorted; 1178 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1179 PetscErrorCode ierr; 1180 1181 PetscFunctionBegin; 1182 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1183 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1184 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1185 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1186 1187 if (!rsorted) { 1188 const PetscInt *idxs; 1189 PetscInt *idxs_sorted,i; 1190 1191 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1192 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1193 for (i=0;i<rsize;i++) { 1194 idxs_perm_r[i] = i; 1195 } 1196 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1197 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1198 for (i=0;i<rsize;i++) { 1199 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1200 } 1201 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1202 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1203 } else { 1204 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1205 isrow_s = isrow; 1206 } 1207 1208 if (!csorted) { 1209 if (isrow == iscol) { 1210 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1211 iscol_s = isrow_s; 1212 } else { 1213 const PetscInt *idxs; 1214 PetscInt *idxs_sorted,i; 1215 1216 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1217 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1218 for (i=0;i<csize;i++) { 1219 idxs_perm_c[i] = i; 1220 } 1221 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1222 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1223 for (i=0;i<csize;i++) { 1224 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1225 } 1226 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1227 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1228 } 1229 } else { 1230 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1231 iscol_s = iscol; 1232 } 1233 1234 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1235 1236 if (!rsorted || !csorted) { 1237 Mat new_mat; 1238 IS is_perm_r,is_perm_c; 1239 1240 if (!rsorted) { 1241 PetscInt *idxs_r,i; 1242 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1243 for (i=0;i<rsize;i++) { 1244 idxs_r[idxs_perm_r[i]] = i; 1245 } 1246 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1247 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1248 } else { 1249 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1250 } 1251 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1252 1253 if (!csorted) { 1254 if (isrow_s == iscol_s) { 1255 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1256 is_perm_c = is_perm_r; 1257 } else { 1258 PetscInt *idxs_c,i; 1259 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1260 for (i=0;i<csize;i++) { 1261 idxs_c[idxs_perm_c[i]] = i; 1262 } 1263 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1264 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1265 } 1266 } else { 1267 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1268 } 1269 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1270 1271 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1272 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1273 work_mat[0] = new_mat; 1274 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1275 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1276 } 1277 1278 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1279 *B = work_mat[0]; 1280 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1281 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1282 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1288 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1289 { 1290 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1291 PC_IS* pcis = (PC_IS*)pc->data; 1292 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1293 Mat new_mat; 1294 IS is_local,is_global; 1295 PetscInt local_size; 1296 PetscBool isseqaij; 1297 PetscErrorCode ierr; 1298 1299 PetscFunctionBegin; 1300 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1301 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1302 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1303 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1304 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1305 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1306 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1307 1308 /* check */ 1309 if (pcbddc->dbg_flag) { 1310 Vec x,x_change; 1311 PetscReal error; 1312 1313 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1314 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1315 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1316 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1317 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1318 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1319 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1320 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1321 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1322 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1323 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1324 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1325 ierr = VecDestroy(&x);CHKERRQ(ierr); 1326 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1327 } 1328 1329 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1330 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1331 if (isseqaij) { 1332 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1333 } else { 1334 Mat work_mat; 1335 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1336 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1337 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1338 } 1339 if (matis->A->symmetric_set) { 1340 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1341 #if !defined(PETSC_USE_COMPLEX) 1342 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1343 #endif 1344 } 1345 /* 1346 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1347 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1348 */ 1349 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1355 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1356 { 1357 PC_IS* pcis = (PC_IS*)(pc->data); 1358 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1359 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1360 PetscInt *idx_R_local=NULL; 1361 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1362 PetscInt vbs,bs; 1363 PetscBT bitmask=NULL; 1364 PetscErrorCode ierr; 1365 1366 PetscFunctionBegin; 1367 /* 1368 No need to setup local scatters if 1369 - primal space is unchanged 1370 AND 1371 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1372 AND 1373 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1374 */ 1375 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1376 PetscFunctionReturn(0); 1377 } 1378 /* destroy old objects */ 1379 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1380 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1381 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1382 /* Set Non-overlapping dimensions */ 1383 n_B = pcis->n_B; 1384 n_D = pcis->n - n_B; 1385 n_vertices = pcbddc->n_vertices; 1386 1387 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1388 1389 /* create auxiliary bitmask and allocate workspace */ 1390 if (!sub_schurs->reuse_mumps) { 1391 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1392 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1393 for (i=0;i<n_vertices;i++) { 1394 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1395 } 1396 1397 for (i=0, n_R=0; i<pcis->n; i++) { 1398 if (!PetscBTLookup(bitmask,i)) { 1399 idx_R_local[n_R++] = i; 1400 } 1401 } 1402 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1403 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1404 1405 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1406 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1407 } 1408 1409 /* Block code */ 1410 vbs = 1; 1411 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1412 if (bs>1 && !(n_vertices%bs)) { 1413 PetscBool is_blocked = PETSC_TRUE; 1414 PetscInt *vary; 1415 if (!sub_schurs->reuse_mumps) { 1416 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1417 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1418 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1419 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1420 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1421 for (i=0; i<pcis->n/bs; i++) { 1422 if (vary[i]!=0 && vary[i]!=bs) { 1423 is_blocked = PETSC_FALSE; 1424 break; 1425 } 1426 } 1427 ierr = PetscFree(vary);CHKERRQ(ierr); 1428 } else { 1429 /* Verify directly the R set */ 1430 for (i=0; i<n_R/bs; i++) { 1431 PetscInt j,node=idx_R_local[bs*i]; 1432 for (j=1; j<bs; j++) { 1433 if (node != idx_R_local[bs*i+j]-j) { 1434 is_blocked = PETSC_FALSE; 1435 break; 1436 } 1437 } 1438 } 1439 } 1440 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1441 vbs = bs; 1442 for (i=0;i<n_R/vbs;i++) { 1443 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1444 } 1445 } 1446 } 1447 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1448 if (sub_schurs->reuse_mumps) { 1449 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1450 1451 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1452 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1453 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1454 reuse_mumps->is_R = pcbddc->is_R_local; 1455 } else { 1456 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1457 } 1458 1459 /* print some info if requested */ 1460 if (pcbddc->dbg_flag) { 1461 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1462 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1463 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1464 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1465 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1467 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1468 } 1469 1470 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1471 if (!sub_schurs->reuse_mumps) { 1472 IS is_aux1,is_aux2; 1473 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1474 1475 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1476 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1477 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1478 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1479 for (i=0; i<n_D; i++) { 1480 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1481 } 1482 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1483 for (i=0, j=0; i<n_R; i++) { 1484 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1485 aux_array1[j++] = i; 1486 } 1487 } 1488 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1489 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1490 for (i=0, j=0; i<n_B; i++) { 1491 if (!PetscBTLookup(bitmask,is_indices[i])) { 1492 aux_array2[j++] = i; 1493 } 1494 } 1495 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1496 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1497 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1498 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1499 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1500 1501 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1502 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1503 for (i=0, j=0; i<n_R; i++) { 1504 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1505 aux_array1[j++] = i; 1506 } 1507 } 1508 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1509 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1510 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1511 } 1512 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1513 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1514 } else { 1515 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1516 IS tis; 1517 PetscInt schur_size; 1518 1519 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1520 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1521 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1522 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1523 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1524 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1525 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1526 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1527 } 1528 } 1529 PetscFunctionReturn(0); 1530 } 1531 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1535 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1536 { 1537 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1538 PC_IS *pcis = (PC_IS*)pc->data; 1539 PC pc_temp; 1540 Mat A_RR; 1541 MatReuse reuse; 1542 PetscScalar m_one = -1.0; 1543 PetscReal value; 1544 PetscInt n_D,n_R; 1545 PetscBool use_exact,use_exact_reduced,issbaij; 1546 PetscErrorCode ierr; 1547 /* prefixes stuff */ 1548 char dir_prefix[256],neu_prefix[256],str_level[16]; 1549 size_t len; 1550 1551 PetscFunctionBegin; 1552 1553 /* compute prefixes */ 1554 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1555 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1556 if (!pcbddc->current_level) { 1557 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1558 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1559 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1560 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1561 } else { 1562 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1563 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1564 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1565 len -= 15; /* remove "pc_bddc_coarse_" */ 1566 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1567 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1568 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1569 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1570 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1571 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1572 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1573 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1574 } 1575 1576 /* DIRICHLET PROBLEM */ 1577 if (dirichlet) { 1578 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1579 if (pcbddc->local_mat->symmetric_set) { 1580 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1581 } 1582 /* Matrix for Dirichlet problem is pcis->A_II */ 1583 n_D = pcis->n - pcis->n_B; 1584 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1585 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1586 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1587 /* default */ 1588 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1589 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1590 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1591 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1592 if (issbaij) { 1593 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1594 } else { 1595 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1596 } 1597 /* Allow user's customization */ 1598 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1599 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1600 } 1601 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1602 if (sub_schurs->reuse_mumps) { 1603 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1604 1605 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1606 } 1607 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1608 if (!n_D) { 1609 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1610 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1611 } 1612 /* Set Up KSP for Dirichlet problem of BDDC */ 1613 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1614 /* set ksp_D into pcis data */ 1615 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1616 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1617 pcis->ksp_D = pcbddc->ksp_D; 1618 } 1619 1620 /* NEUMANN PROBLEM */ 1621 A_RR = 0; 1622 if (neumann) { 1623 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1624 PetscInt ibs,mbs; 1625 PetscBool issbaij; 1626 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1627 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1628 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1629 if (pcbddc->ksp_R) { /* already created ksp */ 1630 PetscInt nn_R; 1631 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1632 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1633 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1634 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1635 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1636 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1637 reuse = MAT_INITIAL_MATRIX; 1638 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1639 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1640 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1641 reuse = MAT_INITIAL_MATRIX; 1642 } else { /* safe to reuse the matrix */ 1643 reuse = MAT_REUSE_MATRIX; 1644 } 1645 } 1646 /* last check */ 1647 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1648 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1649 reuse = MAT_INITIAL_MATRIX; 1650 } 1651 } else { /* first time, so we need to create the matrix */ 1652 reuse = MAT_INITIAL_MATRIX; 1653 } 1654 /* extract A_RR */ 1655 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1656 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1657 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1658 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1659 if (matis->A == pcbddc->local_mat) { 1660 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1661 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1662 } else { 1663 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1664 } 1665 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1666 if (matis->A == pcbddc->local_mat) { 1667 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1668 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1669 } else { 1670 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1671 } 1672 } 1673 if (!sub_schurs->reuse_mumps) { 1674 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1675 if (pcbddc->local_mat->symmetric_set) { 1676 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1677 } 1678 } else { 1679 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1680 1681 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1682 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1683 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1684 } 1685 if (!pcbddc->ksp_R) { /* create object if not present */ 1686 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1687 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1688 /* default */ 1689 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1690 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1691 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1692 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1693 if (issbaij) { 1694 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1695 } else { 1696 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1697 } 1698 /* Allow user's customization */ 1699 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1700 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1701 } 1702 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1703 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1704 if (!n_R) { 1705 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1706 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1707 } 1708 /* Reuse MUMPS solver if it is present */ 1709 if (sub_schurs->reuse_mumps) { 1710 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1711 1712 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1713 } 1714 /* Set Up KSP for Neumann problem of BDDC */ 1715 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1716 } 1717 /* free Neumann problem's matrix */ 1718 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1719 1720 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1721 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1722 if (pcbddc->dbg_flag) { 1723 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1724 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1725 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1726 } 1727 if (dirichlet) { /* Dirichlet */ 1728 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1729 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1730 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1731 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1732 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1733 /* need to be adapted? */ 1734 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1735 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1736 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1737 /* print info */ 1738 if (pcbddc->dbg_flag) { 1739 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1740 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1741 } 1742 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1743 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1744 } 1745 } 1746 if (neumann) { /* Neumann */ 1747 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1748 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1749 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1750 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1751 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1752 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1753 /* need to be adapted? */ 1754 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1755 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1756 /* print info */ 1757 if (pcbddc->dbg_flag) { 1758 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1759 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1760 } 1761 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1762 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1763 } 1764 } 1765 } 1766 PetscFunctionReturn(0); 1767 } 1768 1769 #undef __FUNCT__ 1770 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1771 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1772 { 1773 PetscErrorCode ierr; 1774 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1775 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1776 1777 PetscFunctionBegin; 1778 if (!sub_schurs->reuse_mumps) { 1779 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1780 } 1781 if (!pcbddc->switch_static) { 1782 if (applytranspose && pcbddc->local_auxmat1) { 1783 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1784 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1785 } 1786 if (!sub_schurs->reuse_mumps) { 1787 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1788 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1789 } else { 1790 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1791 1792 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1793 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1794 } 1795 } else { 1796 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1797 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1798 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1799 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1800 if (applytranspose && pcbddc->local_auxmat1) { 1801 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1802 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1803 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1804 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1805 } 1806 } 1807 if (!sub_schurs->reuse_mumps) { 1808 if (applytranspose) { 1809 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1810 } else { 1811 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1812 } 1813 #if defined(PETSC_HAVE_MUMPS) 1814 } else { 1815 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1816 1817 if (applytranspose) { 1818 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1819 } else { 1820 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1821 } 1822 #endif 1823 } 1824 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1825 if (!pcbddc->switch_static) { 1826 if (!sub_schurs->reuse_mumps) { 1827 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1828 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1829 } else { 1830 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1831 1832 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1833 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1834 } 1835 if (!applytranspose && pcbddc->local_auxmat1) { 1836 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1837 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1838 } 1839 } else { 1840 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1841 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1842 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1843 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1844 if (!applytranspose && pcbddc->local_auxmat1) { 1845 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1846 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1847 } 1848 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1849 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1850 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1851 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1852 } 1853 PetscFunctionReturn(0); 1854 } 1855 1856 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1859 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1860 { 1861 PetscErrorCode ierr; 1862 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1863 PC_IS* pcis = (PC_IS*) (pc->data); 1864 const PetscScalar zero = 0.0; 1865 1866 PetscFunctionBegin; 1867 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1868 if (applytranspose) { 1869 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1870 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1871 } else { 1872 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1873 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1874 } 1875 /* start communications from local primal nodes to rhs of coarse solver */ 1876 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1877 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1878 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1879 1880 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1881 /* TODO remove null space when doing multilevel */ 1882 if (pcbddc->coarse_ksp) { 1883 Vec rhs,sol; 1884 1885 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1886 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1887 if (applytranspose) { 1888 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1889 } else { 1890 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1891 } 1892 } 1893 1894 /* Local solution on R nodes */ 1895 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1896 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1897 } 1898 1899 /* communications from coarse sol to local primal nodes */ 1900 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1901 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1902 1903 /* Sum contributions from two levels */ 1904 if (applytranspose) { 1905 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1906 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1907 } else { 1908 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1909 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1910 } 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1916 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1917 { 1918 PetscErrorCode ierr; 1919 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1920 PetscScalar *array; 1921 Vec from,to; 1922 1923 PetscFunctionBegin; 1924 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1925 from = pcbddc->coarse_vec; 1926 to = pcbddc->vec1_P; 1927 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1928 Vec tvec; 1929 1930 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1931 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1932 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1933 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1934 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1935 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1936 } 1937 } else { /* from local to global -> put data in coarse right hand side */ 1938 from = pcbddc->vec1_P; 1939 to = pcbddc->coarse_vec; 1940 } 1941 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1942 PetscFunctionReturn(0); 1943 } 1944 1945 #undef __FUNCT__ 1946 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1947 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1948 { 1949 PetscErrorCode ierr; 1950 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1951 PetscScalar *array; 1952 Vec from,to; 1953 1954 PetscFunctionBegin; 1955 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1956 from = pcbddc->coarse_vec; 1957 to = pcbddc->vec1_P; 1958 } else { /* from local to global -> put data in coarse right hand side */ 1959 from = pcbddc->vec1_P; 1960 to = pcbddc->coarse_vec; 1961 } 1962 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1963 if (smode == SCATTER_FORWARD) { 1964 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1965 Vec tvec; 1966 1967 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1968 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1969 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1970 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1971 } 1972 } else { 1973 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1974 ierr = VecResetArray(from);CHKERRQ(ierr); 1975 } 1976 } 1977 PetscFunctionReturn(0); 1978 } 1979 1980 /* uncomment for testing purposes */ 1981 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1982 #undef __FUNCT__ 1983 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1984 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1985 { 1986 PetscErrorCode ierr; 1987 PC_IS* pcis = (PC_IS*)(pc->data); 1988 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1989 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1990 /* one and zero */ 1991 PetscScalar one=1.0,zero=0.0; 1992 /* space to store constraints and their local indices */ 1993 PetscScalar *constraints_data; 1994 PetscInt *constraints_idxs,*constraints_idxs_B; 1995 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1996 PetscInt *constraints_n; 1997 /* iterators */ 1998 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1999 /* BLAS integers */ 2000 PetscBLASInt lwork,lierr; 2001 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2002 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2003 /* reuse */ 2004 PetscInt olocal_primal_size,olocal_primal_size_cc; 2005 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2006 /* change of basis */ 2007 PetscBool qr_needed; 2008 PetscBT change_basis,qr_needed_idx; 2009 /* auxiliary stuff */ 2010 PetscInt *nnz,*is_indices; 2011 PetscInt ncc; 2012 /* some quantities */ 2013 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2014 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2015 2016 PetscFunctionBegin; 2017 /* Destroy Mat objects computed previously */ 2018 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2019 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2020 /* save info on constraints from previous setup (if any) */ 2021 olocal_primal_size = pcbddc->local_primal_size; 2022 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2023 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2024 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2025 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2026 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2027 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2028 2029 /* print some info */ 2030 if (pcbddc->dbg_flag) { 2031 IS vertices; 2032 PetscInt nv,nedges,nfaces; 2033 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2034 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2035 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2036 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2037 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2038 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2039 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2040 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2041 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2042 } 2043 2044 if (!pcbddc->adaptive_selection) { 2045 IS ISForVertices,*ISForFaces,*ISForEdges; 2046 MatNullSpace nearnullsp; 2047 const Vec *nearnullvecs; 2048 Vec *localnearnullsp; 2049 PetscScalar *array; 2050 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2051 PetscBool nnsp_has_cnst; 2052 /* LAPACK working arrays for SVD or POD */ 2053 PetscBool skip_lapack,boolforchange; 2054 PetscScalar *work; 2055 PetscReal *singular_vals; 2056 #if defined(PETSC_USE_COMPLEX) 2057 PetscReal *rwork; 2058 #endif 2059 #if defined(PETSC_MISSING_LAPACK_GESVD) 2060 PetscScalar *temp_basis,*correlation_mat; 2061 #else 2062 PetscBLASInt dummy_int=1; 2063 PetscScalar dummy_scalar=1.; 2064 #endif 2065 2066 /* Get index sets for faces, edges and vertices from graph */ 2067 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2068 /* free unneeded index sets */ 2069 if (!pcbddc->use_vertices) { 2070 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2071 } 2072 if (!pcbddc->use_edges) { 2073 for (i=0;i<n_ISForEdges;i++) { 2074 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2075 } 2076 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2077 n_ISForEdges = 0; 2078 } 2079 if (!pcbddc->use_faces) { 2080 for (i=0;i<n_ISForFaces;i++) { 2081 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2082 } 2083 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2084 n_ISForFaces = 0; 2085 } 2086 2087 #if defined(PETSC_USE_DEBUG) 2088 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2089 Also use_change_of_basis should be consistent among processors */ 2090 if (pcbddc->NullSpace) { 2091 PetscBool tbool[2],gbool[2]; 2092 2093 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2094 pcbddc->use_change_of_basis = PETSC_TRUE; 2095 if (!ISForEdges) { 2096 pcbddc->use_change_on_faces = PETSC_TRUE; 2097 } 2098 } 2099 tbool[0] = pcbddc->use_change_of_basis; 2100 tbool[1] = pcbddc->use_change_on_faces; 2101 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2102 pcbddc->use_change_of_basis = gbool[0]; 2103 pcbddc->use_change_on_faces = gbool[1]; 2104 } 2105 #endif 2106 2107 /* check if near null space is attached to global mat */ 2108 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2109 if (nearnullsp) { 2110 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2111 /* remove any stored info */ 2112 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2113 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2114 /* store information for BDDC solver reuse */ 2115 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2116 pcbddc->onearnullspace = nearnullsp; 2117 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2118 for (i=0;i<nnsp_size;i++) { 2119 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2120 } 2121 } else { /* if near null space is not provided BDDC uses constants by default */ 2122 nnsp_size = 0; 2123 nnsp_has_cnst = PETSC_TRUE; 2124 } 2125 /* get max number of constraints on a single cc */ 2126 max_constraints = nnsp_size; 2127 if (nnsp_has_cnst) max_constraints++; 2128 2129 /* 2130 Evaluate maximum storage size needed by the procedure 2131 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2132 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2133 There can be multiple constraints per connected component 2134 */ 2135 n_vertices = 0; 2136 if (ISForVertices) { 2137 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2138 } 2139 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2140 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2141 2142 total_counts = n_ISForFaces+n_ISForEdges; 2143 total_counts *= max_constraints; 2144 total_counts += n_vertices; 2145 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2146 2147 total_counts = 0; 2148 max_size_of_constraint = 0; 2149 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2150 IS used_is; 2151 if (i<n_ISForEdges) { 2152 used_is = ISForEdges[i]; 2153 } else { 2154 used_is = ISForFaces[i-n_ISForEdges]; 2155 } 2156 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2157 total_counts += j; 2158 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2159 } 2160 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2161 2162 /* get local part of global near null space vectors */ 2163 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2164 for (k=0;k<nnsp_size;k++) { 2165 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2166 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2167 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2168 } 2169 2170 /* whether or not to skip lapack calls */ 2171 skip_lapack = PETSC_TRUE; 2172 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2173 2174 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2175 if (!skip_lapack) { 2176 PetscScalar temp_work; 2177 2178 #if defined(PETSC_MISSING_LAPACK_GESVD) 2179 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2180 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2181 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2182 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2183 #if defined(PETSC_USE_COMPLEX) 2184 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2185 #endif 2186 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2187 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2188 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2189 lwork = -1; 2190 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2191 #if !defined(PETSC_USE_COMPLEX) 2192 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2193 #else 2194 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2195 #endif 2196 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2197 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2198 #else /* on missing GESVD */ 2199 /* SVD */ 2200 PetscInt max_n,min_n; 2201 max_n = max_size_of_constraint; 2202 min_n = max_constraints; 2203 if (max_size_of_constraint < max_constraints) { 2204 min_n = max_size_of_constraint; 2205 max_n = max_constraints; 2206 } 2207 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2208 #if defined(PETSC_USE_COMPLEX) 2209 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2210 #endif 2211 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2212 lwork = -1; 2213 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2214 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2215 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2216 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2217 #if !defined(PETSC_USE_COMPLEX) 2218 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2219 #else 2220 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2221 #endif 2222 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2223 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2224 #endif /* on missing GESVD */ 2225 /* Allocate optimal workspace */ 2226 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2227 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2228 } 2229 /* Now we can loop on constraining sets */ 2230 total_counts = 0; 2231 constraints_idxs_ptr[0] = 0; 2232 constraints_data_ptr[0] = 0; 2233 /* vertices */ 2234 if (n_vertices) { 2235 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2236 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2237 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2238 for (i=0;i<n_vertices;i++) { 2239 constraints_n[total_counts] = 1; 2240 constraints_data[total_counts] = 1.0; 2241 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2242 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2243 total_counts++; 2244 } 2245 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2246 PetscBool used_vertex; 2247 for (i=0;i<n_vertices;i++) { 2248 used_vertex = PETSC_FALSE; 2249 k = 0; 2250 while (!used_vertex && k<nnsp_size) { 2251 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2252 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2253 constraints_n[total_counts] = 1; 2254 constraints_idxs[total_counts] = is_indices[i]; 2255 constraints_data[total_counts] = 1.0; 2256 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2257 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2258 total_counts++; 2259 used_vertex = PETSC_TRUE; 2260 } 2261 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2262 k++; 2263 } 2264 } 2265 } 2266 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2267 n_vertices = total_counts; 2268 } 2269 2270 /* edges and faces */ 2271 total_counts_cc = total_counts; 2272 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2273 IS used_is; 2274 PetscBool idxs_copied = PETSC_FALSE; 2275 2276 if (ncc<n_ISForEdges) { 2277 used_is = ISForEdges[ncc]; 2278 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2279 } else { 2280 used_is = ISForFaces[ncc-n_ISForEdges]; 2281 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2282 } 2283 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2284 2285 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2286 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2287 /* change of basis should not be performed on local periodic nodes */ 2288 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2289 if (nnsp_has_cnst) { 2290 PetscScalar quad_value; 2291 2292 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2293 idxs_copied = PETSC_TRUE; 2294 2295 if (!pcbddc->use_nnsp_true) { 2296 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2297 } else { 2298 quad_value = 1.0; 2299 } 2300 for (j=0;j<size_of_constraint;j++) { 2301 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2302 } 2303 temp_constraints++; 2304 total_counts++; 2305 } 2306 for (k=0;k<nnsp_size;k++) { 2307 PetscReal real_value; 2308 PetscScalar *ptr_to_data; 2309 2310 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2311 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2312 for (j=0;j<size_of_constraint;j++) { 2313 ptr_to_data[j] = array[is_indices[j]]; 2314 } 2315 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2316 /* check if array is null on the connected component */ 2317 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2318 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2319 if (real_value > 0.0) { /* keep indices and values */ 2320 temp_constraints++; 2321 total_counts++; 2322 if (!idxs_copied) { 2323 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2324 idxs_copied = PETSC_TRUE; 2325 } 2326 } 2327 } 2328 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2329 valid_constraints = temp_constraints; 2330 if (!pcbddc->use_nnsp_true && temp_constraints) { 2331 if (temp_constraints == 1) { /* just normalize the constraint */ 2332 PetscScalar norm,*ptr_to_data; 2333 2334 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2335 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2336 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2337 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2338 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2339 } else { /* perform SVD */ 2340 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2341 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2342 2343 #if defined(PETSC_MISSING_LAPACK_GESVD) 2344 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2345 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2346 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2347 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2348 from that computed using LAPACKgesvd 2349 -> This is due to a different computation of eigenvectors in LAPACKheev 2350 -> The quality of the POD-computed basis will be the same */ 2351 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2352 /* Store upper triangular part of correlation matrix */ 2353 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2354 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2355 for (j=0;j<temp_constraints;j++) { 2356 for (k=0;k<j+1;k++) { 2357 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2358 } 2359 } 2360 /* compute eigenvalues and eigenvectors of correlation matrix */ 2361 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2362 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2363 #if !defined(PETSC_USE_COMPLEX) 2364 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2365 #else 2366 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2367 #endif 2368 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2369 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2370 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2371 j = 0; 2372 while (j < temp_constraints && singular_vals[j] < tol) j++; 2373 total_counts = total_counts-j; 2374 valid_constraints = temp_constraints-j; 2375 /* scale and copy POD basis into used quadrature memory */ 2376 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2377 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2378 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2379 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2380 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2381 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2382 if (j<temp_constraints) { 2383 PetscInt ii; 2384 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2385 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2386 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2387 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2388 for (k=0;k<temp_constraints-j;k++) { 2389 for (ii=0;ii<size_of_constraint;ii++) { 2390 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2391 } 2392 } 2393 } 2394 #else /* on missing GESVD */ 2395 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2396 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2397 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2398 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2399 #if !defined(PETSC_USE_COMPLEX) 2400 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2401 #else 2402 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2403 #endif 2404 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2405 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2406 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2407 k = temp_constraints; 2408 if (k > size_of_constraint) k = size_of_constraint; 2409 j = 0; 2410 while (j < k && singular_vals[k-j-1] < tol) j++; 2411 valid_constraints = k-j; 2412 total_counts = total_counts-temp_constraints+valid_constraints; 2413 #endif /* on missing GESVD */ 2414 } 2415 } 2416 /* update pointers information */ 2417 if (valid_constraints) { 2418 constraints_n[total_counts_cc] = valid_constraints; 2419 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2420 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2421 /* set change_of_basis flag */ 2422 if (boolforchange) { 2423 PetscBTSet(change_basis,total_counts_cc); 2424 } 2425 total_counts_cc++; 2426 } 2427 } 2428 /* free workspace */ 2429 if (!skip_lapack) { 2430 ierr = PetscFree(work);CHKERRQ(ierr); 2431 #if defined(PETSC_USE_COMPLEX) 2432 ierr = PetscFree(rwork);CHKERRQ(ierr); 2433 #endif 2434 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2435 #if defined(PETSC_MISSING_LAPACK_GESVD) 2436 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2437 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2438 #endif 2439 } 2440 for (k=0;k<nnsp_size;k++) { 2441 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2442 } 2443 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2444 /* free index sets of faces, edges and vertices */ 2445 for (i=0;i<n_ISForFaces;i++) { 2446 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2447 } 2448 if (n_ISForFaces) { 2449 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2450 } 2451 for (i=0;i<n_ISForEdges;i++) { 2452 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2453 } 2454 if (n_ISForEdges) { 2455 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2456 } 2457 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2458 } else { 2459 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2460 2461 total_counts = 0; 2462 n_vertices = 0; 2463 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2464 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2465 } 2466 max_constraints = 0; 2467 total_counts_cc = 0; 2468 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2469 total_counts += pcbddc->adaptive_constraints_n[i]; 2470 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2471 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2472 } 2473 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2474 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2475 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2476 constraints_data = pcbddc->adaptive_constraints_data; 2477 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2478 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2479 total_counts_cc = 0; 2480 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2481 if (pcbddc->adaptive_constraints_n[i]) { 2482 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2483 } 2484 } 2485 #if 0 2486 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2487 for (i=0;i<total_counts_cc;i++) { 2488 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2489 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2490 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2491 printf(" %d",constraints_idxs[j]); 2492 } 2493 printf("\n"); 2494 printf("number of cc: %d\n",constraints_n[i]); 2495 } 2496 for (i=0;i<n_vertices;i++) { 2497 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2498 } 2499 for (i=0;i<sub_schurs->n_subs;i++) { 2500 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2501 } 2502 #endif 2503 2504 max_size_of_constraint = 0; 2505 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2506 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2507 /* Change of basis */ 2508 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2509 if (pcbddc->use_change_of_basis) { 2510 for (i=0;i<sub_schurs->n_subs;i++) { 2511 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2512 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2513 } 2514 } 2515 } 2516 } 2517 pcbddc->local_primal_size = total_counts; 2518 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2519 2520 /* map constraints_idxs in boundary numbering */ 2521 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2522 if (i != constraints_idxs_ptr[total_counts_cc]) { 2523 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2524 } 2525 2526 /* Create constraint matrix */ 2527 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2528 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2529 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2530 2531 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2532 /* determine if a QR strategy is needed for change of basis */ 2533 qr_needed = PETSC_FALSE; 2534 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2535 total_primal_vertices=0; 2536 pcbddc->local_primal_size_cc = 0; 2537 for (i=0;i<total_counts_cc;i++) { 2538 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2539 if (size_of_constraint == 1) { 2540 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2541 pcbddc->local_primal_size_cc += 1; 2542 } else if (PetscBTLookup(change_basis,i)) { 2543 for (k=0;k<constraints_n[i];k++) { 2544 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2545 } 2546 pcbddc->local_primal_size_cc += constraints_n[i]; 2547 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2548 PetscBTSet(qr_needed_idx,i); 2549 qr_needed = PETSC_TRUE; 2550 } 2551 } else { 2552 pcbddc->local_primal_size_cc += 1; 2553 } 2554 } 2555 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2556 pcbddc->n_vertices = total_primal_vertices; 2557 /* permute indices in order to have a sorted set of vertices */ 2558 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2559 2560 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2561 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2562 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2563 2564 /* nonzero structure of constraint matrix */ 2565 /* and get reference dof for local constraints */ 2566 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2567 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2568 2569 j = total_primal_vertices; 2570 total_counts = total_primal_vertices; 2571 cum = total_primal_vertices; 2572 for (i=n_vertices;i<total_counts_cc;i++) { 2573 if (!PetscBTLookup(change_basis,i)) { 2574 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2575 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2576 cum++; 2577 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2578 for (k=0;k<constraints_n[i];k++) { 2579 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2580 nnz[j+k] = size_of_constraint; 2581 } 2582 j += constraints_n[i]; 2583 } 2584 } 2585 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2586 ierr = PetscFree(nnz);CHKERRQ(ierr); 2587 2588 /* set values in constraint matrix */ 2589 for (i=0;i<total_primal_vertices;i++) { 2590 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2591 } 2592 total_counts = total_primal_vertices; 2593 for (i=n_vertices;i<total_counts_cc;i++) { 2594 if (!PetscBTLookup(change_basis,i)) { 2595 PetscInt *cols; 2596 2597 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2598 cols = constraints_idxs+constraints_idxs_ptr[i]; 2599 for (k=0;k<constraints_n[i];k++) { 2600 PetscInt row = total_counts+k; 2601 PetscScalar *vals; 2602 2603 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2604 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2605 } 2606 total_counts += constraints_n[i]; 2607 } 2608 } 2609 /* assembling */ 2610 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2611 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2612 2613 /* 2614 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2615 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2616 */ 2617 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2618 if (pcbddc->use_change_of_basis) { 2619 /* dual and primal dofs on a single cc */ 2620 PetscInt dual_dofs,primal_dofs; 2621 /* working stuff for GEQRF */ 2622 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2623 PetscBLASInt lqr_work; 2624 /* working stuff for UNGQR */ 2625 PetscScalar *gqr_work,lgqr_work_t; 2626 PetscBLASInt lgqr_work; 2627 /* working stuff for TRTRS */ 2628 PetscScalar *trs_rhs; 2629 PetscBLASInt Blas_NRHS; 2630 /* pointers for values insertion into change of basis matrix */ 2631 PetscInt *start_rows,*start_cols; 2632 PetscScalar *start_vals; 2633 /* working stuff for values insertion */ 2634 PetscBT is_primal; 2635 PetscInt *aux_primal_numbering_B; 2636 /* matrix sizes */ 2637 PetscInt global_size,local_size; 2638 /* temporary change of basis */ 2639 Mat localChangeOfBasisMatrix; 2640 /* extra space for debugging */ 2641 PetscScalar *dbg_work; 2642 2643 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2644 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2645 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2646 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2647 /* nonzeros for local mat */ 2648 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2649 for (i=0;i<pcis->n;i++) nnz[i]=1; 2650 for (i=n_vertices;i<total_counts_cc;i++) { 2651 if (PetscBTLookup(change_basis,i)) { 2652 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2653 if (PetscBTLookup(qr_needed_idx,i)) { 2654 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2655 } else { 2656 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2657 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2658 } 2659 } 2660 } 2661 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2662 ierr = PetscFree(nnz);CHKERRQ(ierr); 2663 /* Set initial identity in the matrix */ 2664 for (i=0;i<pcis->n;i++) { 2665 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2666 } 2667 2668 if (pcbddc->dbg_flag) { 2669 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2670 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2671 } 2672 2673 2674 /* Now we loop on the constraints which need a change of basis */ 2675 /* 2676 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2677 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2678 2679 Basic blocks of change of basis matrix T computed by 2680 2681 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2682 2683 | 1 0 ... 0 s_1/S | 2684 | 0 1 ... 0 s_2/S | 2685 | ... | 2686 | 0 ... 1 s_{n-1}/S | 2687 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2688 2689 with S = \sum_{i=1}^n s_i^2 2690 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2691 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2692 2693 - QR decomposition of constraints otherwise 2694 */ 2695 if (qr_needed) { 2696 /* space to store Q */ 2697 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2698 /* first we issue queries for optimal work */ 2699 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2700 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2701 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2702 lqr_work = -1; 2703 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2704 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2705 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2706 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2707 lgqr_work = -1; 2708 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2709 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2710 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2711 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2712 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2713 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2714 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2715 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2716 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2717 /* array to store scaling factors for reflectors */ 2718 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2719 /* array to store rhs and solution of triangular solver */ 2720 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2721 /* allocating workspace for check */ 2722 if (pcbddc->dbg_flag) { 2723 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2724 } 2725 } 2726 /* array to store whether a node is primal or not */ 2727 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2728 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2729 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2730 if (i != total_primal_vertices) { 2731 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2732 } 2733 for (i=0;i<total_primal_vertices;i++) { 2734 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2735 } 2736 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2737 2738 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2739 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2740 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2741 if (PetscBTLookup(change_basis,total_counts)) { 2742 /* get constraint info */ 2743 primal_dofs = constraints_n[total_counts]; 2744 dual_dofs = size_of_constraint-primal_dofs; 2745 2746 if (pcbddc->dbg_flag) { 2747 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2748 } 2749 2750 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2751 2752 /* copy quadrature constraints for change of basis check */ 2753 if (pcbddc->dbg_flag) { 2754 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2755 } 2756 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2757 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2758 2759 /* compute QR decomposition of constraints */ 2760 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2761 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2762 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2763 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2764 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2765 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2766 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2767 2768 /* explictly compute R^-T */ 2769 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2770 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2771 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2772 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2773 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2774 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2775 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2776 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2777 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2778 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2779 2780 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2781 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2782 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2783 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2784 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2785 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2786 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2787 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2788 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2789 2790 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2791 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2792 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2793 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2794 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2795 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2796 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2797 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2798 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2799 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2800 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2801 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2802 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2803 2804 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2805 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2806 /* insert cols for primal dofs */ 2807 for (j=0;j<primal_dofs;j++) { 2808 start_vals = &qr_basis[j*size_of_constraint]; 2809 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2810 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2811 } 2812 /* insert cols for dual dofs */ 2813 for (j=0,k=0;j<dual_dofs;k++) { 2814 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2815 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2816 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2817 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2818 j++; 2819 } 2820 } 2821 2822 /* check change of basis */ 2823 if (pcbddc->dbg_flag) { 2824 PetscInt ii,jj; 2825 PetscBool valid_qr=PETSC_TRUE; 2826 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2827 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2828 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2829 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2830 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2831 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2832 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2833 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2834 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2835 for (jj=0;jj<size_of_constraint;jj++) { 2836 for (ii=0;ii<primal_dofs;ii++) { 2837 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2838 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2839 } 2840 } 2841 if (!valid_qr) { 2842 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2843 for (jj=0;jj<size_of_constraint;jj++) { 2844 for (ii=0;ii<primal_dofs;ii++) { 2845 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2846 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2847 } 2848 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2849 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2850 } 2851 } 2852 } 2853 } else { 2854 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2855 } 2856 } 2857 } else { /* simple transformation block */ 2858 PetscInt row,col; 2859 PetscScalar val,norm; 2860 2861 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2862 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2863 for (j=0;j<size_of_constraint;j++) { 2864 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2865 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2866 if (!PetscBTLookup(is_primal,row_B)) { 2867 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2868 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2869 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2870 } else { 2871 for (k=0;k<size_of_constraint;k++) { 2872 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2873 if (row != col) { 2874 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2875 } else { 2876 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2877 } 2878 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2879 } 2880 } 2881 } 2882 if (pcbddc->dbg_flag) { 2883 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2884 } 2885 } 2886 } else { 2887 if (pcbddc->dbg_flag) { 2888 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2889 } 2890 } 2891 } 2892 2893 /* free workspace */ 2894 if (qr_needed) { 2895 if (pcbddc->dbg_flag) { 2896 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2897 } 2898 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2899 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2900 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2901 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2902 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2903 } 2904 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2905 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2906 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2907 2908 /* assembling of global change of variable */ 2909 { 2910 Mat tmat; 2911 PetscInt bs; 2912 2913 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2914 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2915 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2916 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2917 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2918 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2919 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2920 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2921 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2922 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2923 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2924 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2925 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2926 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2927 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2928 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2929 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2930 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2931 } 2932 /* check */ 2933 if (pcbddc->dbg_flag) { 2934 PetscReal error; 2935 Vec x,x_change; 2936 2937 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2938 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2939 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2940 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2941 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2942 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2943 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2944 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2945 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2946 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2947 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2948 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2949 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2950 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2951 ierr = VecDestroy(&x);CHKERRQ(ierr); 2952 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2953 } 2954 2955 /* adapt sub_schurs computed (if any) */ 2956 if (pcbddc->use_deluxe_scaling) { 2957 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2958 if (sub_schurs->S_Ej_all) { 2959 Mat S_new,tmat; 2960 IS is_all_N; 2961 2962 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2963 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2964 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2965 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2966 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2967 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2968 sub_schurs->S_Ej_all = S_new; 2969 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2970 if (sub_schurs->sum_S_Ej_all) { 2971 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2972 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2973 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2974 sub_schurs->sum_S_Ej_all = S_new; 2975 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2976 } 2977 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2978 } 2979 } 2980 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2981 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2982 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2983 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2984 } 2985 2986 /* set up change of basis context */ 2987 if (pcbddc->ChangeOfBasisMatrix) { 2988 PCBDDCChange_ctx change_ctx; 2989 2990 if (!pcbddc->new_global_mat) { 2991 PetscInt global_size,local_size; 2992 2993 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2994 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2995 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2996 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2997 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2998 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2999 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3000 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3001 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3002 } else { 3003 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3004 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3005 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3006 } 3007 if (!pcbddc->user_ChangeOfBasisMatrix) { 3008 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3009 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3010 } else { 3011 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3012 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3013 } 3014 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3015 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3016 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3017 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3018 } 3019 3020 /* check if a new primal space has been introduced */ 3021 pcbddc->new_primal_space_local = PETSC_TRUE; 3022 if (olocal_primal_size == pcbddc->local_primal_size) { 3023 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3024 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3025 if (!pcbddc->new_primal_space_local) { 3026 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3027 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3028 } 3029 } 3030 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3031 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3032 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3033 3034 /* flush dbg viewer */ 3035 if (pcbddc->dbg_flag) { 3036 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3037 } 3038 3039 /* free workspace */ 3040 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3041 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3042 if (!pcbddc->adaptive_selection) { 3043 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3044 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3045 } else { 3046 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3047 pcbddc->adaptive_constraints_idxs_ptr, 3048 pcbddc->adaptive_constraints_data_ptr, 3049 pcbddc->adaptive_constraints_idxs, 3050 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3051 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3052 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3053 } 3054 PetscFunctionReturn(0); 3055 } 3056 3057 #undef __FUNCT__ 3058 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3059 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3060 { 3061 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3062 PC_IS *pcis = (PC_IS*)pc->data; 3063 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3064 PetscInt ierr,i,vertex_size,N; 3065 PetscViewer viewer=pcbddc->dbg_viewer; 3066 3067 PetscFunctionBegin; 3068 /* Reset previously computed graph */ 3069 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3070 /* Init local Graph struct */ 3071 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3072 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3073 3074 /* Check validity of the csr graph passed in by the user */ 3075 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3076 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3077 } 3078 3079 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3080 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3081 PetscInt *xadj,*adjncy; 3082 PetscInt nvtxs; 3083 PetscBool flg_row=PETSC_FALSE; 3084 3085 if (pcbddc->use_local_adj) { 3086 3087 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3088 if (flg_row) { 3089 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3090 pcbddc->computed_rowadj = PETSC_TRUE; 3091 } 3092 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3093 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3094 IS is_dummy; 3095 ISLocalToGlobalMapping l2gmap_dummy; 3096 PetscInt j,sum; 3097 PetscInt *cxadj,*cadjncy; 3098 const PetscInt *idxs; 3099 PCBDDCGraph graph; 3100 PetscBT is_on_boundary; 3101 3102 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3103 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3104 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3105 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3106 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3107 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3108 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3109 if (flg_row) { 3110 graph->xadj = xadj; 3111 graph->adjncy = adjncy; 3112 } 3113 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3114 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3115 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3116 3117 if (pcbddc->dbg_flag) { 3118 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3119 for (i=0;i<graph->ncc;i++) { 3120 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3121 } 3122 } 3123 3124 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3125 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3126 for (i=0;i<pcis->n_B;i++) { 3127 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3128 } 3129 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3130 3131 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3132 sum = 0; 3133 for (i=0;i<graph->ncc;i++) { 3134 PetscInt sizecc = 0; 3135 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3136 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3137 sizecc++; 3138 } 3139 } 3140 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3141 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3142 cxadj[graph->queue[j]] = sizecc; 3143 } 3144 } 3145 sum += sizecc*sizecc; 3146 } 3147 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3148 sum = 0; 3149 for (i=0;i<pcis->n;i++) { 3150 PetscInt temp = cxadj[i]; 3151 cxadj[i] = sum; 3152 sum += temp; 3153 } 3154 cxadj[pcis->n] = sum; 3155 for (i=0;i<graph->ncc;i++) { 3156 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3157 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3158 PetscInt k,sizecc = 0; 3159 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3160 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3161 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3162 sizecc++; 3163 } 3164 } 3165 } 3166 } 3167 } 3168 if (sum) { 3169 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3170 } else { 3171 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3172 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3173 } 3174 graph->xadj = 0; 3175 graph->adjncy = 0; 3176 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3177 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3178 } 3179 } 3180 if (pcbddc->dbg_flag) { 3181 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3182 } 3183 3184 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3185 vertex_size = 1; 3186 if (pcbddc->user_provided_isfordofs) { 3187 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3188 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3189 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3190 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3191 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3192 } 3193 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3194 pcbddc->n_ISForDofs = 0; 3195 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3196 } 3197 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3198 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3199 } else { 3200 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3201 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3202 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3203 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3204 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3205 } 3206 } 3207 } 3208 3209 /* Setup of Graph */ 3210 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3211 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3212 } 3213 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3214 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3215 } 3216 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3217 3218 /* Graph's connected components analysis */ 3219 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3220 3221 /* print some info to stdout */ 3222 if (pcbddc->dbg_flag) { 3223 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3224 } 3225 3226 /* mark topography has done */ 3227 pcbddc->recompute_topography = PETSC_FALSE; 3228 PetscFunctionReturn(0); 3229 } 3230 3231 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3232 #undef __FUNCT__ 3233 #define __FUNCT__ "PCBDDCSubsetNumbering" 3234 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3235 { 3236 PetscSF sf; 3237 PetscLayout map; 3238 const PetscInt *idxs; 3239 PetscInt *leaf_data,*root_data,*gidxs; 3240 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3241 PetscInt n_n,nlocals,start,first_index; 3242 PetscMPIInt commsize; 3243 PetscBool first_found; 3244 PetscErrorCode ierr; 3245 3246 PetscFunctionBegin; 3247 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3248 if (subset_mult) { 3249 PetscCheckSameComm(subset,1,subset_mult,2); 3250 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3251 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3252 } 3253 /* create workspace layout for computing global indices of subset */ 3254 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3255 lbounds[0] = lbounds[1] = 0; 3256 for (i=0;i<n;i++) { 3257 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3258 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3259 } 3260 lbounds[0] = -lbounds[0]; 3261 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3262 gbounds[0] = -gbounds[0]; 3263 N = gbounds[1] - gbounds[0] + 1; 3264 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3265 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3266 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3267 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3268 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3269 3270 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3271 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3272 if (subset_mult) { 3273 const PetscInt* idxs_mult; 3274 3275 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3276 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3277 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3278 } else { 3279 for (i=0;i<n;i++) leaf_data[i] = 1; 3280 } 3281 /* local size of new subset */ 3282 n_n = 0; 3283 for (i=0;i<n;i++) n_n += leaf_data[i]; 3284 3285 /* global indexes in layout */ 3286 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3287 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3288 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3289 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3290 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3291 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3292 3293 /* reduce from leaves to roots */ 3294 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3295 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3296 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3297 3298 /* count indexes in local part of layout */ 3299 nlocals = 0; 3300 first_index = -1; 3301 first_found = PETSC_FALSE; 3302 for (i=0;i<Nl;i++) { 3303 if (!first_found && root_data[i]) { 3304 first_found = PETSC_TRUE; 3305 first_index = i; 3306 } 3307 nlocals += root_data[i]; 3308 } 3309 3310 /* cumulative of number of indexes and size of subset without holes */ 3311 #if defined(PETSC_HAVE_MPI_EXSCAN) 3312 start = 0; 3313 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3314 #else 3315 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3316 start = start-nlocals; 3317 #endif 3318 3319 if (N_n) { /* compute total size of new subset if requested */ 3320 *N_n = start + nlocals; 3321 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3322 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3323 } 3324 3325 /* adapt root data with cumulative */ 3326 if (first_found) { 3327 PetscInt old_index; 3328 3329 root_data[first_index] += start; 3330 old_index = first_index; 3331 for (i=first_index+1;i<Nl;i++) { 3332 if (root_data[i]) { 3333 root_data[i] += root_data[old_index]; 3334 old_index = i; 3335 } 3336 } 3337 } 3338 3339 /* from roots to leaves */ 3340 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3341 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3342 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3343 3344 /* create new IS with global indexes without holes */ 3345 if (subset_mult) { 3346 const PetscInt* idxs_mult; 3347 PetscInt cum; 3348 3349 cum = 0; 3350 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3351 for (i=0;i<n;i++) { 3352 PetscInt j; 3353 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3354 } 3355 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3356 } else { 3357 for (i=0;i<n;i++) { 3358 gidxs[i] = leaf_data[i]-1; 3359 } 3360 } 3361 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3362 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3363 PetscFunctionReturn(0); 3364 } 3365 3366 #undef __FUNCT__ 3367 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3368 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3369 { 3370 PetscInt i,j; 3371 PetscScalar *alphas; 3372 PetscErrorCode ierr; 3373 3374 PetscFunctionBegin; 3375 /* this implements stabilized Gram-Schmidt */ 3376 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3377 for (i=0;i<n;i++) { 3378 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3379 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3380 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3381 } 3382 ierr = PetscFree(alphas);CHKERRQ(ierr); 3383 PetscFunctionReturn(0); 3384 } 3385 3386 #undef __FUNCT__ 3387 #define __FUNCT__ "MatISGetSubassemblingPattern" 3388 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3389 { 3390 IS ranks_send_to; 3391 PetscInt n_neighs,*neighs,*n_shared,**shared; 3392 PetscMPIInt size,rank,color; 3393 PetscInt *xadj,*adjncy; 3394 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3395 PetscInt i,local_size,threshold=0; 3396 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3397 PetscSubcomm subcomm; 3398 PetscErrorCode ierr; 3399 3400 PetscFunctionBegin; 3401 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3402 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3403 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3404 3405 /* Get info on mapping */ 3406 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3407 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3408 3409 /* build local CSR graph of subdomains' connectivity */ 3410 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3411 xadj[0] = 0; 3412 xadj[1] = PetscMax(n_neighs-1,0); 3413 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3414 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3415 3416 if (threshold) { 3417 PetscInt xadj_count = 0; 3418 for (i=1;i<n_neighs;i++) { 3419 if (n_shared[i] > threshold) { 3420 adjncy[xadj_count] = neighs[i]; 3421 adjncy_wgt[xadj_count] = n_shared[i]; 3422 xadj_count++; 3423 } 3424 } 3425 xadj[1] = xadj_count; 3426 } else { 3427 if (xadj[1]) { 3428 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3429 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3430 } 3431 } 3432 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3433 if (use_square) { 3434 for (i=0;i<xadj[1];i++) { 3435 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3436 } 3437 } 3438 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3439 3440 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3441 3442 /* 3443 Restrict work on active processes only. 3444 */ 3445 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3446 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3447 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3448 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3449 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3450 if (color) { 3451 ierr = PetscFree(xadj);CHKERRQ(ierr); 3452 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3453 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3454 } else { 3455 Mat subdomain_adj; 3456 IS new_ranks,new_ranks_contig; 3457 MatPartitioning partitioner; 3458 PetscInt prank,rstart=0,rend=0; 3459 PetscInt *is_indices,*oldranks; 3460 PetscBool aggregate; 3461 3462 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3463 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3464 prank = rank; 3465 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3466 /* 3467 for (i=0;i<size;i++) { 3468 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3469 } 3470 */ 3471 for (i=0;i<xadj[1];i++) { 3472 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3473 } 3474 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3475 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3476 if (aggregate) { 3477 PetscInt lrows,row,ncols,*cols; 3478 PetscMPIInt nrank; 3479 PetscScalar *vals; 3480 3481 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3482 lrows = 0; 3483 if (nrank<redprocs) { 3484 lrows = size/redprocs; 3485 if (nrank<size%redprocs) lrows++; 3486 } 3487 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3488 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3489 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3490 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3491 row = nrank; 3492 ncols = xadj[1]-xadj[0]; 3493 cols = adjncy; 3494 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3495 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3496 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3497 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3498 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3499 ierr = PetscFree(xadj);CHKERRQ(ierr); 3500 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3501 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3502 ierr = PetscFree(vals);CHKERRQ(ierr); 3503 } else { 3504 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3505 } 3506 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3507 3508 /* Partition */ 3509 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3510 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3511 if (use_vwgt) { 3512 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3513 v_wgt[0] = local_size; 3514 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3515 } 3516 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3517 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3518 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3519 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3520 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3521 3522 /* renumber new_ranks to avoid "holes" in new set of processors */ 3523 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3524 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3525 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3526 if (!redprocs) { 3527 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3528 } else { 3529 PetscInt idxs[1]; 3530 PetscMPIInt tag; 3531 MPI_Request *reqs; 3532 3533 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3534 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3535 for (i=rstart;i<rend;i++) { 3536 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3537 } 3538 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3539 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3540 ierr = PetscFree(reqs);CHKERRQ(ierr); 3541 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3542 } 3543 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3544 /* clean up */ 3545 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3546 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3547 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3548 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3549 } 3550 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3551 3552 /* assemble parallel IS for sends */ 3553 i = 1; 3554 if (color) i=0; 3555 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3556 /* get back IS */ 3557 *is_sends = ranks_send_to; 3558 PetscFunctionReturn(0); 3559 } 3560 3561 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3562 3563 #undef __FUNCT__ 3564 #define __FUNCT__ "MatISSubassemble" 3565 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3566 { 3567 Mat local_mat; 3568 IS is_sends_internal; 3569 PetscInt rows,cols,new_local_rows; 3570 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3571 PetscBool ismatis,isdense,newisdense,destroy_mat; 3572 ISLocalToGlobalMapping l2gmap; 3573 PetscInt* l2gmap_indices; 3574 const PetscInt* is_indices; 3575 MatType new_local_type; 3576 /* buffers */ 3577 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3578 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3579 PetscInt *recv_buffer_idxs_local; 3580 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3581 /* MPI */ 3582 MPI_Comm comm,comm_n; 3583 PetscSubcomm subcomm; 3584 PetscMPIInt n_sends,n_recvs,commsize; 3585 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3586 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3587 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3588 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3589 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3590 PetscErrorCode ierr; 3591 3592 PetscFunctionBegin; 3593 /* TODO: add missing checks */ 3594 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3595 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3596 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3597 PetscValidLogicalCollectiveInt(mat,nis,7); 3598 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3599 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3600 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3601 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3602 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3603 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3604 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3605 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3606 PetscInt mrows,mcols,mnrows,mncols; 3607 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3608 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3609 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3610 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3611 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3612 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3613 } 3614 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3615 PetscValidLogicalCollectiveInt(mat,bs,0); 3616 /* prepare IS for sending if not provided */ 3617 if (!is_sends) { 3618 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3619 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3620 } else { 3621 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3622 is_sends_internal = is_sends; 3623 } 3624 3625 /* get comm */ 3626 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3627 3628 /* compute number of sends */ 3629 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3630 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3631 3632 /* compute number of receives */ 3633 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3634 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3635 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3636 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3637 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3638 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3639 ierr = PetscFree(iflags);CHKERRQ(ierr); 3640 3641 /* restrict comm if requested */ 3642 subcomm = 0; 3643 destroy_mat = PETSC_FALSE; 3644 if (restrict_comm) { 3645 PetscMPIInt color,subcommsize; 3646 3647 color = 0; 3648 if (restrict_full) { 3649 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3650 } else { 3651 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3652 } 3653 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3654 subcommsize = commsize - subcommsize; 3655 /* check if reuse has been requested */ 3656 if (reuse == MAT_REUSE_MATRIX) { 3657 if (*mat_n) { 3658 PetscMPIInt subcommsize2; 3659 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3660 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3661 comm_n = PetscObjectComm((PetscObject)*mat_n); 3662 } else { 3663 comm_n = PETSC_COMM_SELF; 3664 } 3665 } else { /* MAT_INITIAL_MATRIX */ 3666 PetscMPIInt rank; 3667 3668 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3669 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3670 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3671 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3672 comm_n = PetscSubcommChild(subcomm); 3673 } 3674 /* flag to destroy *mat_n if not significative */ 3675 if (color) destroy_mat = PETSC_TRUE; 3676 } else { 3677 comm_n = comm; 3678 } 3679 3680 /* prepare send/receive buffers */ 3681 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3682 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3683 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3684 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3685 if (nis) { 3686 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3687 } 3688 3689 /* Get data from local matrices */ 3690 if (!isdense) { 3691 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3692 /* TODO: See below some guidelines on how to prepare the local buffers */ 3693 /* 3694 send_buffer_vals should contain the raw values of the local matrix 3695 send_buffer_idxs should contain: 3696 - MatType_PRIVATE type 3697 - PetscInt size_of_l2gmap 3698 - PetscInt global_row_indices[size_of_l2gmap] 3699 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3700 */ 3701 } else { 3702 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3703 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3704 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3705 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3706 send_buffer_idxs[1] = i; 3707 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3708 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3709 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3710 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3711 for (i=0;i<n_sends;i++) { 3712 ilengths_vals[is_indices[i]] = len*len; 3713 ilengths_idxs[is_indices[i]] = len+2; 3714 } 3715 } 3716 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3717 /* additional is (if any) */ 3718 if (nis) { 3719 PetscMPIInt psum; 3720 PetscInt j; 3721 for (j=0,psum=0;j<nis;j++) { 3722 PetscInt plen; 3723 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3724 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3725 psum += len+1; /* indices + lenght */ 3726 } 3727 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3728 for (j=0,psum=0;j<nis;j++) { 3729 PetscInt plen; 3730 const PetscInt *is_array_idxs; 3731 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3732 send_buffer_idxs_is[psum] = plen; 3733 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3734 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3735 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3736 psum += plen+1; /* indices + lenght */ 3737 } 3738 for (i=0;i<n_sends;i++) { 3739 ilengths_idxs_is[is_indices[i]] = psum; 3740 } 3741 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3742 } 3743 3744 buf_size_idxs = 0; 3745 buf_size_vals = 0; 3746 buf_size_idxs_is = 0; 3747 for (i=0;i<n_recvs;i++) { 3748 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3749 buf_size_vals += (PetscInt)olengths_vals[i]; 3750 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3751 } 3752 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3753 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3754 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3755 3756 /* get new tags for clean communications */ 3757 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3758 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3759 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3760 3761 /* allocate for requests */ 3762 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3763 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3764 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3765 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3766 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3767 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3768 3769 /* communications */ 3770 ptr_idxs = recv_buffer_idxs; 3771 ptr_vals = recv_buffer_vals; 3772 ptr_idxs_is = recv_buffer_idxs_is; 3773 for (i=0;i<n_recvs;i++) { 3774 source_dest = onodes[i]; 3775 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3776 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3777 ptr_idxs += olengths_idxs[i]; 3778 ptr_vals += olengths_vals[i]; 3779 if (nis) { 3780 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3781 ptr_idxs_is += olengths_idxs_is[i]; 3782 } 3783 } 3784 for (i=0;i<n_sends;i++) { 3785 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3786 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3787 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3788 if (nis) { 3789 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3790 } 3791 } 3792 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3793 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3794 3795 /* assemble new l2g map */ 3796 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3797 ptr_idxs = recv_buffer_idxs; 3798 new_local_rows = 0; 3799 for (i=0;i<n_recvs;i++) { 3800 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3801 ptr_idxs += olengths_idxs[i]; 3802 } 3803 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3804 ptr_idxs = recv_buffer_idxs; 3805 new_local_rows = 0; 3806 for (i=0;i<n_recvs;i++) { 3807 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3808 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3809 ptr_idxs += olengths_idxs[i]; 3810 } 3811 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3812 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3813 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3814 3815 /* infer new local matrix type from received local matrices type */ 3816 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3817 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3818 if (n_recvs) { 3819 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3820 ptr_idxs = recv_buffer_idxs; 3821 for (i=0;i<n_recvs;i++) { 3822 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3823 new_local_type_private = MATAIJ_PRIVATE; 3824 break; 3825 } 3826 ptr_idxs += olengths_idxs[i]; 3827 } 3828 switch (new_local_type_private) { 3829 case MATDENSE_PRIVATE: 3830 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3831 new_local_type = MATSEQAIJ; 3832 bs = 1; 3833 } else { /* if I receive only 1 dense matrix */ 3834 new_local_type = MATSEQDENSE; 3835 bs = 1; 3836 } 3837 break; 3838 case MATAIJ_PRIVATE: 3839 new_local_type = MATSEQAIJ; 3840 bs = 1; 3841 break; 3842 case MATBAIJ_PRIVATE: 3843 new_local_type = MATSEQBAIJ; 3844 break; 3845 case MATSBAIJ_PRIVATE: 3846 new_local_type = MATSEQSBAIJ; 3847 break; 3848 default: 3849 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3850 break; 3851 } 3852 } else { /* by default, new_local_type is seqdense */ 3853 new_local_type = MATSEQDENSE; 3854 bs = 1; 3855 } 3856 3857 /* create MATIS object if needed */ 3858 if (reuse == MAT_INITIAL_MATRIX) { 3859 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3860 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 3861 } else { 3862 /* it also destroys the local matrices */ 3863 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3864 } 3865 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3866 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3867 3868 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3869 3870 /* Global to local map of received indices */ 3871 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3872 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3873 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3874 3875 /* restore attributes -> type of incoming data and its size */ 3876 buf_size_idxs = 0; 3877 for (i=0;i<n_recvs;i++) { 3878 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3879 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3880 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3881 } 3882 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3883 3884 /* set preallocation */ 3885 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3886 if (!newisdense) { 3887 PetscInt *new_local_nnz=0; 3888 3889 ptr_vals = recv_buffer_vals; 3890 ptr_idxs = recv_buffer_idxs_local; 3891 if (n_recvs) { 3892 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3893 } 3894 for (i=0;i<n_recvs;i++) { 3895 PetscInt j; 3896 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3897 for (j=0;j<*(ptr_idxs+1);j++) { 3898 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3899 } 3900 } else { 3901 /* TODO */ 3902 } 3903 ptr_idxs += olengths_idxs[i]; 3904 } 3905 if (new_local_nnz) { 3906 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3907 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3908 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3909 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3910 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3911 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3912 } else { 3913 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3914 } 3915 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3916 } else { 3917 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3918 } 3919 3920 /* set values */ 3921 ptr_vals = recv_buffer_vals; 3922 ptr_idxs = recv_buffer_idxs_local; 3923 for (i=0;i<n_recvs;i++) { 3924 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3925 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3926 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3927 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3928 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3929 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3930 } else { 3931 /* TODO */ 3932 } 3933 ptr_idxs += olengths_idxs[i]; 3934 ptr_vals += olengths_vals[i]; 3935 } 3936 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3937 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3938 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3939 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3940 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3941 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3942 3943 #if 0 3944 if (!restrict_comm) { /* check */ 3945 Vec lvec,rvec; 3946 PetscReal infty_error; 3947 3948 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3949 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3950 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3951 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3952 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3953 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3954 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3955 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3956 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3957 } 3958 #endif 3959 3960 /* assemble new additional is (if any) */ 3961 if (nis) { 3962 PetscInt **temp_idxs,*count_is,j,psum; 3963 3964 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3965 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3966 ptr_idxs = recv_buffer_idxs_is; 3967 psum = 0; 3968 for (i=0;i<n_recvs;i++) { 3969 for (j=0;j<nis;j++) { 3970 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3971 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3972 psum += plen; 3973 ptr_idxs += plen+1; /* shift pointer to received data */ 3974 } 3975 } 3976 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3977 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3978 for (i=1;i<nis;i++) { 3979 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3980 } 3981 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3982 ptr_idxs = recv_buffer_idxs_is; 3983 for (i=0;i<n_recvs;i++) { 3984 for (j=0;j<nis;j++) { 3985 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3986 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3987 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3988 ptr_idxs += plen+1; /* shift pointer to received data */ 3989 } 3990 } 3991 for (i=0;i<nis;i++) { 3992 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3993 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3994 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3995 } 3996 ierr = PetscFree(count_is);CHKERRQ(ierr); 3997 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3998 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3999 } 4000 /* free workspace */ 4001 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4002 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4003 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4004 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4005 if (isdense) { 4006 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4007 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4008 } else { 4009 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4010 } 4011 if (nis) { 4012 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4013 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4014 } 4015 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4016 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4017 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4018 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4019 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4020 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4021 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4022 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4023 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4024 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4025 ierr = PetscFree(onodes);CHKERRQ(ierr); 4026 if (nis) { 4027 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4028 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4029 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4030 } 4031 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4032 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4033 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4034 for (i=0;i<nis;i++) { 4035 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4036 } 4037 *mat_n = NULL; 4038 } 4039 PetscFunctionReturn(0); 4040 } 4041 4042 /* temporary hack into ksp private data structure */ 4043 #include <petsc/private/kspimpl.h> 4044 4045 #undef __FUNCT__ 4046 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4047 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4048 { 4049 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4050 PC_IS *pcis = (PC_IS*)pc->data; 4051 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4052 MatNullSpace CoarseNullSpace=NULL; 4053 ISLocalToGlobalMapping coarse_islg; 4054 IS coarse_is,*isarray; 4055 PetscInt i,im_active=-1,active_procs=-1; 4056 PetscInt nis,nisdofs,nisneu; 4057 PC pc_temp; 4058 PCType coarse_pc_type; 4059 KSPType coarse_ksp_type; 4060 PetscBool multilevel_requested,multilevel_allowed; 4061 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4062 Mat t_coarse_mat_is; 4063 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4064 PetscMPIInt all_procs; 4065 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4066 PetscBool compute_vecs = PETSC_FALSE; 4067 PetscScalar *array; 4068 PetscErrorCode ierr; 4069 4070 PetscFunctionBegin; 4071 /* Assign global numbering to coarse dofs */ 4072 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4073 PetscInt ocoarse_size; 4074 compute_vecs = PETSC_TRUE; 4075 ocoarse_size = pcbddc->coarse_size; 4076 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4077 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4078 /* see if we can avoid some work */ 4079 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4080 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4081 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4082 PC pc; 4083 PetscBool isbddc; 4084 4085 /* temporary workaround since PCBDDC does not have a reset method so far */ 4086 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4087 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4088 if (isbddc) { 4089 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4090 } 4091 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4092 coarse_reuse = PETSC_FALSE; 4093 } else { /* we can safely reuse already computed coarse matrix */ 4094 coarse_reuse = PETSC_TRUE; 4095 } 4096 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4097 coarse_reuse = PETSC_FALSE; 4098 } 4099 /* reset any subassembling information */ 4100 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4101 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4102 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4103 coarse_reuse = PETSC_TRUE; 4104 } 4105 4106 /* count "active" (i.e. with positive local size) and "void" processes */ 4107 im_active = !!(pcis->n); 4108 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4109 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4110 void_procs = all_procs-active_procs; 4111 csin_type_simple = PETSC_TRUE; 4112 redist = PETSC_FALSE; 4113 if (pcbddc->current_level && void_procs) { 4114 csin_ml = PETSC_TRUE; 4115 ncoarse_ml = void_procs; 4116 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4117 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4118 csin_ds = PETSC_TRUE; 4119 ncoarse_ds = pcbddc->redistribute_coarse; 4120 redist = PETSC_TRUE; 4121 } else { 4122 csin_ds = PETSC_TRUE; 4123 ncoarse_ds = active_procs; 4124 redist = PETSC_TRUE; 4125 } 4126 } else { 4127 csin_ml = PETSC_FALSE; 4128 ncoarse_ml = all_procs; 4129 if (void_procs) { 4130 csin_ds = PETSC_TRUE; 4131 ncoarse_ds = void_procs; 4132 csin_type_simple = PETSC_FALSE; 4133 } else { 4134 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4135 csin_ds = PETSC_TRUE; 4136 ncoarse_ds = pcbddc->redistribute_coarse; 4137 redist = PETSC_TRUE; 4138 } else { 4139 csin_ds = PETSC_FALSE; 4140 ncoarse_ds = all_procs; 4141 } 4142 } 4143 } 4144 4145 /* 4146 test if we can go multilevel: three conditions must be satisfied: 4147 - we have not exceeded the number of levels requested 4148 - we can actually subassemble the active processes 4149 - we can find a suitable number of MPI processes where we can place the subassembled problem 4150 */ 4151 multilevel_allowed = PETSC_FALSE; 4152 multilevel_requested = PETSC_FALSE; 4153 if (pcbddc->current_level < pcbddc->max_levels) { 4154 multilevel_requested = PETSC_TRUE; 4155 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4156 multilevel_allowed = PETSC_FALSE; 4157 } else { 4158 multilevel_allowed = PETSC_TRUE; 4159 } 4160 } 4161 /* determine number of process partecipating to coarse solver */ 4162 if (multilevel_allowed) { 4163 ncoarse = ncoarse_ml; 4164 csin = csin_ml; 4165 redist = PETSC_FALSE; 4166 } else { 4167 ncoarse = ncoarse_ds; 4168 csin = csin_ds; 4169 } 4170 4171 /* creates temporary l2gmap and IS for coarse indexes */ 4172 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4173 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4174 4175 /* creates temporary MATIS object for coarse matrix */ 4176 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4177 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4178 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4179 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4180 #if 0 4181 { 4182 PetscViewer viewer; 4183 char filename[256]; 4184 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4185 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4186 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4187 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4188 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4189 } 4190 #endif 4191 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4192 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4193 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4194 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4195 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4196 4197 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4198 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4199 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4200 const PetscInt *idxs; 4201 ISLocalToGlobalMapping tmap; 4202 4203 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4204 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4205 /* allocate space for temporary storage */ 4206 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4207 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4208 /* allocate for IS array */ 4209 nisdofs = pcbddc->n_ISForDofsLocal; 4210 nisneu = !!pcbddc->NeumannBoundariesLocal; 4211 nis = nisdofs + nisneu; 4212 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4213 /* dofs splitting */ 4214 for (i=0;i<nisdofs;i++) { 4215 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4216 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4217 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4218 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4219 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4220 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4221 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4222 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4223 } 4224 /* neumann boundaries */ 4225 if (pcbddc->NeumannBoundariesLocal) { 4226 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4227 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4228 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4229 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4230 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4231 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4232 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4233 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4234 } 4235 /* free memory */ 4236 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4237 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4238 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4239 } else { 4240 nis = 0; 4241 nisdofs = 0; 4242 nisneu = 0; 4243 isarray = NULL; 4244 } 4245 /* destroy no longer needed map */ 4246 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4247 4248 /* restrict on coarse candidates (if needed) */ 4249 coarse_mat_is = NULL; 4250 if (csin) { 4251 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4252 if (redist) { 4253 PetscMPIInt rank; 4254 PetscInt spc,n_spc_p1,dest[1],destsize; 4255 4256 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4257 spc = active_procs/ncoarse; 4258 n_spc_p1 = active_procs%ncoarse; 4259 if (im_active) { 4260 destsize = 1; 4261 if (rank > n_spc_p1*(spc+1)-1) { 4262 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4263 } else { 4264 dest[0] = rank/(spc+1); 4265 } 4266 } else { 4267 destsize = 0; 4268 } 4269 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4270 } else if (csin_type_simple) { 4271 PetscMPIInt rank; 4272 PetscInt issize,isidx; 4273 4274 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4275 if (im_active) { 4276 issize = 1; 4277 isidx = (PetscInt)rank; 4278 } else { 4279 issize = 0; 4280 isidx = -1; 4281 } 4282 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4283 } else { /* get a suitable subassembling pattern from MATIS code */ 4284 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4285 } 4286 4287 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4288 if (!redist || ncoarse <= void_procs) { 4289 PetscInt ncoarse_cand,tissize,*nisindices; 4290 PetscInt *coarse_candidates; 4291 const PetscInt* tisindices; 4292 4293 /* get coarse candidates' ranks in pc communicator */ 4294 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4295 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4296 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4297 if (!coarse_candidates[i]) { 4298 coarse_candidates[ncoarse_cand++]=i; 4299 } 4300 } 4301 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4302 4303 4304 if (pcbddc->dbg_flag) { 4305 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4306 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4307 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4308 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4309 for (i=0;i<ncoarse_cand;i++) { 4310 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4311 } 4312 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4313 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4314 } 4315 /* shift the pattern on coarse candidates */ 4316 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4317 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4318 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4319 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4320 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4321 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4322 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4323 } 4324 if (pcbddc->dbg_flag) { 4325 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4326 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4327 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4328 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4329 } 4330 } 4331 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4332 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4333 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4334 } else { /* this is the last level, so use just receiving processes in subcomm */ 4335 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4336 } 4337 } else { 4338 if (pcbddc->dbg_flag) { 4339 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4340 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4341 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4342 } 4343 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4344 coarse_mat_is = t_coarse_mat_is; 4345 } 4346 4347 /* create local to global scatters for coarse problem */ 4348 if (compute_vecs) { 4349 PetscInt lrows; 4350 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4351 if (coarse_mat_is) { 4352 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4353 } else { 4354 lrows = 0; 4355 } 4356 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4357 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4358 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4359 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4360 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4361 } 4362 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4363 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4364 4365 /* set defaults for coarse KSP and PC */ 4366 if (multilevel_allowed) { 4367 coarse_ksp_type = KSPRICHARDSON; 4368 coarse_pc_type = PCBDDC; 4369 } else { 4370 coarse_ksp_type = KSPPREONLY; 4371 coarse_pc_type = PCREDUNDANT; 4372 } 4373 4374 /* print some info if requested */ 4375 if (pcbddc->dbg_flag) { 4376 if (!multilevel_allowed) { 4377 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4378 if (multilevel_requested) { 4379 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4380 } else if (pcbddc->max_levels) { 4381 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4382 } 4383 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4384 } 4385 } 4386 4387 /* create the coarse KSP object only once with defaults */ 4388 if (coarse_mat_is) { 4389 MatReuse coarse_mat_reuse; 4390 PetscViewer dbg_viewer = NULL; 4391 if (pcbddc->dbg_flag) { 4392 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4393 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4394 } 4395 if (!pcbddc->coarse_ksp) { 4396 char prefix[256],str_level[16]; 4397 size_t len; 4398 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4399 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4400 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4401 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4402 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4403 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4404 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4405 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4406 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4407 /* prefix */ 4408 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4409 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4410 if (!pcbddc->current_level) { 4411 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4412 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4413 } else { 4414 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4415 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4416 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4417 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4418 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4419 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4420 } 4421 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4422 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4423 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4424 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4425 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4426 /* allow user customization */ 4427 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4428 } 4429 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4430 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4431 if (nisdofs) { 4432 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4433 for (i=0;i<nisdofs;i++) { 4434 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4435 } 4436 } 4437 if (nisneu) { 4438 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4439 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4440 } 4441 4442 /* get some info after set from options */ 4443 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4444 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4445 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4446 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4447 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4448 isbddc = PETSC_FALSE; 4449 } 4450 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4451 if (isredundant) { 4452 KSP inner_ksp; 4453 PC inner_pc; 4454 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4455 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4456 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4457 } 4458 4459 /* assemble coarse matrix */ 4460 if (coarse_reuse) { 4461 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4462 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4463 coarse_mat_reuse = MAT_REUSE_MATRIX; 4464 } else { 4465 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4466 } 4467 if (isbddc || isnn) { 4468 if (pcbddc->coarsening_ratio > 1) { 4469 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4470 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4471 if (pcbddc->dbg_flag) { 4472 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4473 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4474 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4475 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4476 } 4477 } 4478 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4479 } else { 4480 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4481 coarse_mat = coarse_mat_is; 4482 } 4483 } else { 4484 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4485 } 4486 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4487 4488 /* propagate symmetry info of coarse matrix */ 4489 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4490 if (pc->pmat->symmetric_set) { 4491 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4492 } 4493 if (pc->pmat->hermitian_set) { 4494 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4495 } 4496 if (pc->pmat->spd_set) { 4497 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4498 } 4499 /* set operators */ 4500 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4501 if (pcbddc->dbg_flag) { 4502 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4503 } 4504 } else { /* processes non partecipating to coarse solver (if any) */ 4505 coarse_mat = 0; 4506 } 4507 ierr = PetscFree(isarray);CHKERRQ(ierr); 4508 #if 0 4509 { 4510 PetscViewer viewer; 4511 char filename[256]; 4512 sprintf(filename,"coarse_mat.m"); 4513 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4514 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4515 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4516 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4517 } 4518 #endif 4519 4520 /* Compute coarse null space (special handling by BDDC only) */ 4521 #if 0 4522 if (pcbddc->NullSpace) { 4523 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4524 } 4525 #endif 4526 4527 if (pcbddc->coarse_ksp) { 4528 Vec crhs,csol; 4529 PetscBool ispreonly; 4530 4531 if (CoarseNullSpace) { 4532 if (isbddc) { 4533 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4534 } else { 4535 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4536 } 4537 } 4538 /* setup coarse ksp */ 4539 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4540 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4541 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4542 /* hack */ 4543 if (!csol) { 4544 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4545 } 4546 if (!crhs) { 4547 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4548 } 4549 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4550 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4551 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4552 KSP check_ksp; 4553 KSPType check_ksp_type; 4554 PC check_pc; 4555 Vec check_vec,coarse_vec; 4556 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4557 PetscInt its; 4558 PetscBool compute_eigs; 4559 PetscReal *eigs_r,*eigs_c; 4560 PetscInt neigs; 4561 const char *prefix; 4562 4563 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4564 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4565 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4566 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4567 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4568 if (ispreonly) { 4569 check_ksp_type = KSPPREONLY; 4570 compute_eigs = PETSC_FALSE; 4571 } else { 4572 check_ksp_type = KSPGMRES; 4573 compute_eigs = PETSC_TRUE; 4574 } 4575 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4576 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4577 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4578 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4579 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4580 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4581 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4582 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4583 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4584 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4585 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4586 /* create random vec */ 4587 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4588 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4589 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4590 if (CoarseNullSpace) { 4591 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4592 } 4593 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4594 /* solve coarse problem */ 4595 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4596 if (CoarseNullSpace) { 4597 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4598 } 4599 /* set eigenvalue estimation if preonly has not been requested */ 4600 if (compute_eigs) { 4601 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4602 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4603 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4604 lambda_max = eigs_r[neigs-1]; 4605 lambda_min = eigs_r[0]; 4606 if (pcbddc->use_coarse_estimates) { 4607 if (lambda_max>lambda_min) { 4608 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4609 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4610 } 4611 } 4612 } 4613 4614 /* check coarse problem residual error */ 4615 if (pcbddc->dbg_flag) { 4616 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4617 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4618 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4619 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4620 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4621 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4622 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4623 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4624 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4625 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4626 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4627 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4628 if (compute_eigs) { 4629 PetscReal lambda_max_s,lambda_min_s; 4630 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4631 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4632 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4633 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4634 for (i=0;i<neigs;i++) { 4635 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4636 } 4637 } 4638 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4639 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4640 } 4641 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4642 if (compute_eigs) { 4643 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4644 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4645 } 4646 } 4647 } 4648 /* print additional info */ 4649 if (pcbddc->dbg_flag) { 4650 /* waits until all processes reaches this point */ 4651 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4652 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4653 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4654 } 4655 4656 /* free memory */ 4657 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4658 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4659 PetscFunctionReturn(0); 4660 } 4661 4662 #undef __FUNCT__ 4663 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4664 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4665 { 4666 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4667 PC_IS* pcis = (PC_IS*)pc->data; 4668 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4669 IS subset,subset_mult,subset_n; 4670 PetscInt local_size,coarse_size=0; 4671 PetscInt *local_primal_indices=NULL; 4672 const PetscInt *t_local_primal_indices; 4673 PetscErrorCode ierr; 4674 4675 PetscFunctionBegin; 4676 /* Compute global number of coarse dofs */ 4677 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4678 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4679 } 4680 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4681 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4682 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4683 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4684 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4685 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4686 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4687 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4688 if (local_size != pcbddc->local_primal_size) { 4689 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4690 } 4691 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4692 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4693 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4694 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4695 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4696 4697 /* check numbering */ 4698 if (pcbddc->dbg_flag) { 4699 PetscScalar coarsesum,*array; 4700 PetscInt i; 4701 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4702 4703 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4704 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4705 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4706 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4707 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4708 for (i=0;i<pcbddc->local_primal_size;i++) { 4709 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4710 } 4711 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4712 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4713 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4714 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4715 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4716 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4717 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4718 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4719 for (i=0;i<pcis->n;i++) { 4720 if (array[i] == 1.0) { 4721 set_error = PETSC_TRUE; 4722 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4723 } 4724 } 4725 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4726 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4727 for (i=0;i<pcis->n;i++) { 4728 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4729 } 4730 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4731 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4732 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4733 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4734 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4735 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4736 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4737 PetscInt *gidxs; 4738 4739 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4740 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4741 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4742 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4743 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4744 for (i=0;i<pcbddc->local_primal_size;i++) { 4745 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4746 } 4747 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4748 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4749 } 4750 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4751 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4752 } 4753 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4754 /* get back data */ 4755 *coarse_size_n = coarse_size; 4756 *local_primal_indices_n = local_primal_indices; 4757 PetscFunctionReturn(0); 4758 } 4759 4760 #undef __FUNCT__ 4761 #define __FUNCT__ "PCBDDCGlobalToLocal" 4762 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4763 { 4764 IS localis_t; 4765 PetscInt i,lsize,*idxs,n; 4766 PetscScalar *vals; 4767 PetscErrorCode ierr; 4768 4769 PetscFunctionBegin; 4770 /* get indices in local ordering exploiting local to global map */ 4771 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4772 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4773 for (i=0;i<lsize;i++) vals[i] = 1.0; 4774 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4775 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4776 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4777 if (idxs) { /* multilevel guard */ 4778 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4779 } 4780 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4781 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4782 ierr = PetscFree(vals);CHKERRQ(ierr); 4783 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4784 /* now compute set in local ordering */ 4785 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4786 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4787 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4788 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4789 for (i=0,lsize=0;i<n;i++) { 4790 if (PetscRealPart(vals[i]) > 0.5) { 4791 lsize++; 4792 } 4793 } 4794 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4795 for (i=0,lsize=0;i<n;i++) { 4796 if (PetscRealPart(vals[i]) > 0.5) { 4797 idxs[lsize++] = i; 4798 } 4799 } 4800 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4801 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4802 *localis = localis_t; 4803 PetscFunctionReturn(0); 4804 } 4805 4806 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4807 #undef __FUNCT__ 4808 #define __FUNCT__ "PCBDDCMatMult_Private" 4809 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4810 { 4811 PCBDDCChange_ctx change_ctx; 4812 PetscErrorCode ierr; 4813 4814 PetscFunctionBegin; 4815 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4816 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4817 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4818 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4819 PetscFunctionReturn(0); 4820 } 4821 4822 #undef __FUNCT__ 4823 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4824 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4825 { 4826 PCBDDCChange_ctx change_ctx; 4827 PetscErrorCode ierr; 4828 4829 PetscFunctionBegin; 4830 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4831 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4832 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4833 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4834 PetscFunctionReturn(0); 4835 } 4836 4837 #undef __FUNCT__ 4838 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4839 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4840 { 4841 PC_IS *pcis=(PC_IS*)pc->data; 4842 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4843 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4844 Mat S_j; 4845 PetscInt *used_xadj,*used_adjncy; 4846 PetscBool free_used_adj; 4847 PetscErrorCode ierr; 4848 4849 PetscFunctionBegin; 4850 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4851 free_used_adj = PETSC_FALSE; 4852 if (pcbddc->sub_schurs_layers == -1) { 4853 used_xadj = NULL; 4854 used_adjncy = NULL; 4855 } else { 4856 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4857 used_xadj = pcbddc->mat_graph->xadj; 4858 used_adjncy = pcbddc->mat_graph->adjncy; 4859 } else if (pcbddc->computed_rowadj) { 4860 used_xadj = pcbddc->mat_graph->xadj; 4861 used_adjncy = pcbddc->mat_graph->adjncy; 4862 } else { 4863 PetscBool flg_row=PETSC_FALSE; 4864 const PetscInt *xadj,*adjncy; 4865 PetscInt nvtxs; 4866 4867 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4868 if (flg_row) { 4869 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4870 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4871 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4872 free_used_adj = PETSC_TRUE; 4873 } else { 4874 pcbddc->sub_schurs_layers = -1; 4875 used_xadj = NULL; 4876 used_adjncy = NULL; 4877 } 4878 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4879 } 4880 } 4881 4882 /* setup sub_schurs data */ 4883 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4884 if (!sub_schurs->use_mumps) { 4885 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4886 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4887 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 4888 } else { 4889 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4890 PetscBool isseqaij; 4891 if (!pcbddc->use_vertices && reuse_solvers) { 4892 PetscInt n_vertices; 4893 4894 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4895 reuse_solvers = (PetscBool)!n_vertices; 4896 } 4897 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4898 if (!isseqaij) { 4899 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4900 if (matis->A == pcbddc->local_mat) { 4901 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4902 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4903 } else { 4904 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4905 } 4906 } 4907 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 4908 } 4909 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4910 4911 /* free adjacency */ 4912 if (free_used_adj) { 4913 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4914 } 4915 PetscFunctionReturn(0); 4916 } 4917 4918 #undef __FUNCT__ 4919 #define __FUNCT__ "PCBDDCInitSubSchurs" 4920 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4921 { 4922 PC_IS *pcis=(PC_IS*)pc->data; 4923 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4924 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4925 PCBDDCGraph graph; 4926 PetscErrorCode ierr; 4927 4928 PetscFunctionBegin; 4929 /* attach interface graph for determining subsets */ 4930 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4931 IS verticesIS,verticescomm; 4932 PetscInt vsize,*idxs; 4933 4934 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4935 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 4936 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4937 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 4938 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 4939 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4940 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4941 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4942 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 4943 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 4944 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4945 /* 4946 if (pcbddc->dbg_flag) { 4947 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4948 } 4949 */ 4950 } else { 4951 graph = pcbddc->mat_graph; 4952 } 4953 4954 /* sub_schurs init */ 4955 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4956 4957 /* free graph struct */ 4958 if (pcbddc->sub_schurs_rebuild) { 4959 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4960 } 4961 PetscFunctionReturn(0); 4962 } 4963