1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 10 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 const PetscInt *idxs; 14 PetscInt nz = 0; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 if (!pcbddc->zerodiag) { 19 PetscFunctionReturn(0); 20 } else { 21 ierr = ISGetLocalSize(pcbddc->zerodiag,&nz);CHKERRQ(ierr); 22 if (!nz) PetscFunctionReturn(0); 23 } 24 ierr = ISGetIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr); 25 /* TODO: add error checking 26 - avoid nested pop (or push) calls. 27 - cannot push before pop. 28 */ 29 if (pop) { 30 const PetscInt *cB0_cols; 31 PetscInt cB0_ncol; 32 const PetscScalar *cB0_vals; 33 34 /* extract B_0 */ 35 ierr = MatGetRow(pcbddc->local_mat,idxs[nz-1],&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 36 pcbddc->B0_ncol = cB0_ncol; 37 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 38 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 39 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 40 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 41 ierr = MatRestoreRow(pcbddc->local_mat,idxs[nz-1],&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 42 /* remove rows and cols from local problem */ 43 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 44 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,idxs+nz-1,1.,NULL,NULL);CHKERRQ(ierr); 45 } else { /* push */ 46 ierr = MatSetValues(pcbddc->local_mat,1,&idxs[nz-1],pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 47 ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&idxs[nz-1],pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 48 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 49 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 50 } 51 PetscFunctionReturn(0); 52 } 53 54 #undef __FUNCT__ 55 #define __FUNCT__ "PCBDDCAdaptiveSelection" 56 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 57 { 58 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 59 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 60 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 61 PetscBLASInt *B_iwork,*B_ifail; 62 PetscScalar *work,lwork; 63 PetscScalar *St,*S,*eigv; 64 PetscScalar *Sarray,*Starray; 65 PetscReal *eigs,thresh; 66 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 67 PetscBool allocated_S_St; 68 #if defined(PETSC_USE_COMPLEX) 69 PetscReal *rwork; 70 #endif 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 if (!sub_schurs->use_mumps) { 75 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 76 } 77 78 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 79 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 80 } 81 82 if (pcbddc->dbg_flag) { 83 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 84 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 85 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 86 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 87 } 88 89 if (pcbddc->dbg_flag) { 90 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 91 } 92 93 /* max size of subsets */ 94 mss = 0; 95 for (i=0;i<sub_schurs->n_subs;i++) { 96 PetscInt subset_size; 97 98 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 99 mss = PetscMax(mss,subset_size); 100 } 101 102 /* min/max and threshold */ 103 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 104 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 105 nmax = PetscMax(nmin,nmax); 106 allocated_S_St = PETSC_FALSE; 107 if (nmin) { 108 allocated_S_St = PETSC_TRUE; 109 } 110 111 /* allocate lapack workspace */ 112 cum = cum2 = 0; 113 maxneigs = 0; 114 for (i=0;i<sub_schurs->n_subs;i++) { 115 PetscInt n,subset_size; 116 117 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 118 n = PetscMin(subset_size,nmax); 119 cum += subset_size; 120 cum2 += subset_size*n; 121 maxneigs = PetscMax(maxneigs,n); 122 } 123 if (mss) { 124 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 125 PetscBLASInt B_itype = 1; 126 PetscBLASInt B_N = mss; 127 PetscReal zero = 0.0; 128 PetscReal eps = 0.0; /* dlamch? */ 129 130 B_lwork = -1; 131 S = NULL; 132 St = NULL; 133 eigs = NULL; 134 eigv = NULL; 135 B_iwork = NULL; 136 B_ifail = NULL; 137 #if defined(PETSC_USE_COMPLEX) 138 rwork = NULL; 139 #endif 140 thresh = 1.0; 141 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 142 #if defined(PETSC_USE_COMPLEX) 143 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 144 #else 145 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 146 #endif 147 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 148 ierr = PetscFPTrapPop();CHKERRQ(ierr); 149 } else { 150 /* TODO */ 151 } 152 } else { 153 lwork = 0; 154 } 155 156 nv = 0; 157 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 158 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 159 } 160 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 161 if (allocated_S_St) { 162 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 163 } 164 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 165 #if defined(PETSC_USE_COMPLEX) 166 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 167 #endif 168 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 169 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 170 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 171 nv+cum,&pcbddc->adaptive_constraints_idxs, 172 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 173 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 174 175 maxneigs = 0; 176 cum = cum2 = cumarray = 0; 177 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 178 pcbddc->adaptive_constraints_data_ptr[0] = 0; 179 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 180 const PetscInt *idxs; 181 182 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 183 for (cum=0;cum<nv;cum++) { 184 pcbddc->adaptive_constraints_n[cum] = 1; 185 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 186 pcbddc->adaptive_constraints_data[cum] = 1.0; 187 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 188 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 189 } 190 cum2 = cum; 191 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 192 } 193 194 if (mss) { /* multilevel */ 195 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 196 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 197 } 198 199 for (i=0;i<sub_schurs->n_subs;i++) { 200 201 const PetscInt *idxs; 202 PetscReal infty = PETSC_MAX_REAL; 203 PetscInt j,subset_size,eigs_start = 0; 204 PetscBLASInt B_N; 205 PetscBool same_data = PETSC_FALSE; 206 207 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 208 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 209 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 210 if (sub_schurs->is_hermitian) { 211 PetscInt j,k; 212 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 213 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 214 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 215 } 216 for (j=0;j<subset_size;j++) { 217 for (k=j;k<subset_size;k++) { 218 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 219 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 220 } 221 } 222 } else { 223 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 224 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 225 } 226 } else { 227 S = Sarray + cumarray; 228 St = Starray + cumarray; 229 } 230 231 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 232 /* see if we can save some work */ 233 if (sub_schurs->n_subs == 1) { 234 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 235 } 236 237 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 238 B_neigs = 0; 239 } else { 240 /* Threshold: this is an heuristic for edges */ 241 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 242 243 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 244 PetscBLASInt B_itype = 1; 245 PetscBLASInt B_IL, B_IU; 246 PetscReal eps = -1.0; /* dlamch? */ 247 PetscInt nmin_s; 248 249 /* ask for eigenvalues larger than thresh */ 250 if (pcbddc->dbg_flag) { 251 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 252 } 253 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 254 #if defined(PETSC_USE_COMPLEX) 255 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 256 #else 257 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 258 #endif 259 ierr = PetscFPTrapPop();CHKERRQ(ierr); 260 if (B_ierr) { 261 if (B_ierr < 0 ) { 262 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 263 } else if (B_ierr <= B_N) { 264 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 265 } else { 266 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 267 } 268 } 269 270 if (B_neigs > nmax) { 271 if (pcbddc->dbg_flag) { 272 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 273 } 274 eigs_start = B_neigs -nmax; 275 B_neigs = nmax; 276 } 277 278 nmin_s = PetscMin(nmin,B_N); 279 if (B_neigs < nmin_s) { 280 PetscBLASInt B_neigs2; 281 282 B_IU = B_N - B_neigs; 283 B_IL = B_N - nmin_s + 1; 284 if (pcbddc->dbg_flag) { 285 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 286 } 287 if (sub_schurs->is_hermitian) { 288 PetscInt j; 289 for (j=0;j<subset_size;j++) { 290 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 291 } 292 for (j=0;j<subset_size;j++) { 293 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 294 } 295 } else { 296 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 297 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 298 } 299 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 300 #if defined(PETSC_USE_COMPLEX) 301 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 302 #else 303 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 304 #endif 305 ierr = PetscFPTrapPop();CHKERRQ(ierr); 306 B_neigs += B_neigs2; 307 } 308 if (B_ierr) { 309 if (B_ierr < 0 ) { 310 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 311 } else if (B_ierr <= B_N) { 312 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 313 } else { 314 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 315 } 316 } 317 if (pcbddc->dbg_flag) { 318 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 319 for (j=0;j<B_neigs;j++) { 320 if (eigs[j] == 0.0) { 321 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 322 } else { 323 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 324 } 325 } 326 } 327 } else { 328 /* TODO */ 329 } 330 } 331 maxneigs = PetscMax(B_neigs,maxneigs); 332 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 333 if (B_neigs) { 334 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 335 336 if (pcbddc->dbg_flag > 1) { 337 PetscInt ii; 338 for (ii=0;ii<B_neigs;ii++) { 339 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 340 for (j=0;j<B_N;j++) { 341 #if defined(PETSC_USE_COMPLEX) 342 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 343 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 344 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 345 #else 346 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 347 #endif 348 } 349 } 350 } 351 #if 0 352 for (j=0;j<B_neigs;j++) { 353 PetscBLASInt Blas_N,Blas_one = 1.0; 354 PetscScalar norm; 355 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 356 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 357 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 358 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 359 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 360 } else { 361 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 362 } 363 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 364 } 365 #endif 366 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 367 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 368 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 369 cum++; 370 } 371 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 372 /* shift for next computation */ 373 cumarray += subset_size*subset_size; 374 } 375 if (pcbddc->dbg_flag) { 376 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 377 } 378 379 if (mss) { 380 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 381 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 382 /* destroy matrices (junk) */ 383 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 384 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 385 } 386 if (allocated_S_St) { 387 ierr = PetscFree2(S,St);CHKERRQ(ierr); 388 } 389 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 390 #if defined(PETSC_USE_COMPLEX) 391 ierr = PetscFree(rwork);CHKERRQ(ierr); 392 #endif 393 if (pcbddc->dbg_flag) { 394 PetscInt maxneigs_r; 395 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 396 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 397 } 398 PetscFunctionReturn(0); 399 } 400 401 #undef __FUNCT__ 402 #define __FUNCT__ "PCBDDCSetUpSolvers" 403 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 404 { 405 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 406 PetscScalar *coarse_submat_vals; 407 PetscErrorCode ierr; 408 409 PetscFunctionBegin; 410 /* Setup local scatters R_to_B and (optionally) R_to_D */ 411 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 412 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 413 414 /* Setup local neumann solver ksp_R */ 415 /* PCBDDCSetUpLocalScatters should be called first! */ 416 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 417 418 /* Change global null space passed in by the user if change of basis has been requested */ 419 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 420 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 421 } 422 423 /* 424 Setup local correction and local part of coarse basis. 425 Gives back the dense local part of the coarse matrix in column major ordering 426 */ 427 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 428 429 /* Compute total number of coarse nodes and setup coarse solver */ 430 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 431 432 /* free */ 433 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 434 PetscFunctionReturn(0); 435 } 436 437 #undef __FUNCT__ 438 #define __FUNCT__ "PCBDDCResetCustomization" 439 PetscErrorCode PCBDDCResetCustomization(PC pc) 440 { 441 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 442 PetscErrorCode ierr; 443 444 PetscFunctionBegin; 445 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 446 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 447 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 448 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 449 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 450 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 451 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 452 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 453 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 454 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 455 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNCT__ 460 #define __FUNCT__ "PCBDDCResetTopography" 461 PetscErrorCode PCBDDCResetTopography(PC pc) 462 { 463 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 464 PetscErrorCode ierr; 465 466 PetscFunctionBegin; 467 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 468 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 469 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 470 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 471 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 475 #undef __FUNCT__ 476 #define __FUNCT__ "PCBDDCResetSolvers" 477 PetscErrorCode PCBDDCResetSolvers(PC pc) 478 { 479 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 480 PetscScalar *array; 481 PetscErrorCode ierr; 482 483 PetscFunctionBegin; 484 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 485 if (pcbddc->coarse_phi_B) { 486 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 487 ierr = PetscFree(array);CHKERRQ(ierr); 488 } 489 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 490 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 491 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 492 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 493 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 494 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 495 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 496 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 497 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 498 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 499 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 500 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 501 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 502 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 503 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 504 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 505 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 506 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 507 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 508 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 509 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 510 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 511 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 512 ierr = ISDestroy(&pcbddc->zerodiag);CHKERRQ(ierr); 513 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 514 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 515 PetscFunctionReturn(0); 516 } 517 518 #undef __FUNCT__ 519 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 520 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 521 { 522 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 523 PC_IS *pcis = (PC_IS*)pc->data; 524 VecType impVecType; 525 PetscInt n_constraints,n_R,old_size,n_benign; 526 PetscErrorCode ierr; 527 528 PetscFunctionBegin; 529 if (!pcbddc->ConstraintMatrix) { 530 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 531 } 532 /* get sizes */ 533 n_benign = (PetscInt)(!!pcbddc->zerodiag); 534 n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices; 535 if (n_benign && n_constraints) { 536 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot handle benign susbspace trick without change of basis"); 537 } 538 n_R = pcis->n - pcbddc->n_vertices; 539 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 540 /* local work vectors (try to avoid unneeded work)*/ 541 /* R nodes */ 542 old_size = -1; 543 if (pcbddc->vec1_R) { 544 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 545 } 546 if (n_R != old_size) { 547 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 548 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 549 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 550 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 551 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 552 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 553 } 554 /* local primal dofs */ 555 old_size = -1; 556 if (pcbddc->vec1_P) { 557 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 558 } 559 if (pcbddc->local_primal_size != old_size) { 560 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 561 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 562 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 563 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 564 } 565 /* local explicit constraints */ 566 old_size = -1; 567 if (pcbddc->vec1_C) { 568 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 569 } 570 if (n_constraints && n_constraints != old_size) { 571 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 572 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 573 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 574 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 575 } 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "PCBDDCSetUpCorrection" 581 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 582 { 583 PetscErrorCode ierr; 584 /* pointers to pcis and pcbddc */ 585 PC_IS* pcis = (PC_IS*)pc->data; 586 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 587 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 588 /* submatrices of local problem */ 589 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 590 /* submatrices of benign trick */ 591 Mat B0_R,B0_V; 592 /* submatrices of local coarse problem */ 593 Mat S_VV,S_CV,S_VC,S_CC; 594 /* working matrices */ 595 Mat C_CR; 596 /* additional working stuff */ 597 PC pc_R; 598 Mat F; 599 Vec vec_R_benign; 600 PetscBool isLU,isCHOL,isILU; 601 602 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 603 PetscScalar *work; 604 PetscInt *idx_V_B; 605 PetscInt n,n_vertices,n_constraints,n_benign; 606 PetscInt i,n_R,n_D,n_B; 607 PetscBool unsymmetric_check; 608 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 609 MatType impMatType; 610 /* some shortcuts to scalars */ 611 PetscScalar one=1.0,m_one=-1.0; 612 613 PetscFunctionBegin; 614 n_benign = (PetscInt)(!!pcbddc->zerodiag); 615 n_vertices = pcbddc->n_vertices; 616 n_constraints = pcbddc->local_primal_size - n_benign - n_vertices; 617 /* Set Non-overlapping dimensions */ 618 n_B = pcis->n_B; 619 n_D = pcis->n - n_B; 620 n_R = pcis->n - n_vertices; 621 622 /* Set types for local objects needed by BDDC precondtioner */ 623 impMatType = MATSEQDENSE; 624 625 /* vertices in boundary numbering */ 626 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 627 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 628 if (i != n_vertices) { 629 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 630 } 631 632 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 633 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 634 if (!n_benign) { 635 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 636 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 637 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 638 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 639 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 640 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 641 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 642 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 643 } else { /* these wrappers are not needed with the benign trick */ 644 S_VV = S_CV = S_VC = S_CC = NULL; 645 } 646 647 unsymmetric_check = PETSC_FALSE; 648 /* allocate workspace */ 649 n = 0; 650 if (n_constraints) { 651 n += n_R*n_constraints; 652 } 653 if (n_vertices) { 654 n = PetscMax(2*n_R*n_vertices,n); 655 n = PetscMax((n_R+n_B)*n_vertices,n); 656 } 657 if (!pcbddc->symmetric_primal) { 658 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 659 unsymmetric_check = PETSC_TRUE; 660 } 661 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 662 663 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 664 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 665 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 666 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 667 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 668 if (isLU || isILU || isCHOL) { 669 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 670 } else if (sub_schurs->reuse_mumps) { 671 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 672 MatFactorType type; 673 674 F = reuse_mumps->F; 675 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 676 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 677 } else { 678 F = NULL; 679 } 680 681 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 682 if (n_constraints) { 683 Mat M1,M2,M3; 684 Mat auxmat; 685 IS is_aux; 686 PetscScalar *array,*array2; 687 688 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 689 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 690 691 /* Extract constraints on R nodes: C_{CR} */ 692 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 693 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 694 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 695 696 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 697 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 698 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 699 for (i=0;i<n_constraints;i++) { 700 const PetscScalar *row_cmat_values; 701 const PetscInt *row_cmat_indices; 702 PetscInt size_of_constraint,j; 703 704 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 705 for (j=0;j<size_of_constraint;j++) { 706 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 707 } 708 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 709 } 710 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 711 if (F) { 712 Mat B; 713 714 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 715 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 716 ierr = MatDestroy(&B);CHKERRQ(ierr); 717 } else { 718 PetscScalar *marr; 719 720 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 721 for (i=0;i<n_constraints;i++) { 722 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 723 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 724 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 725 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 726 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 727 } 728 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 729 } 730 if (!pcbddc->switch_static) { 731 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 732 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 733 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 734 for (i=0;i<n_constraints;i++) { 735 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 736 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 737 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 738 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 739 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 740 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 741 } 742 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 743 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 744 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 745 } else { 746 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 747 pcbddc->local_auxmat2 = local_auxmat2_R; 748 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 749 } 750 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 751 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 752 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 753 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 754 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 755 if (isCHOL) { 756 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 757 } else { 758 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 759 } 760 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 761 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 762 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 763 ierr = MatDestroy(&M2);CHKERRQ(ierr); 764 ierr = MatDestroy(&M3);CHKERRQ(ierr); 765 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 766 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 767 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 768 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 769 ierr = MatDestroy(&M1);CHKERRQ(ierr); 770 } 771 /* Get submatrices from subdomain matrix */ 772 if (n_vertices) { 773 IS is_aux; 774 775 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 776 IS tis; 777 778 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 779 ierr = ISSort(tis);CHKERRQ(ierr); 780 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 781 ierr = ISDestroy(&tis);CHKERRQ(ierr); 782 } else { 783 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 784 } 785 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 786 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 787 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 788 if (n_benign) { 789 Mat B0; 790 IS dummy; 791 PetscInt ii[2]; 792 793 ii[0] = 0; 794 ii[1] = pcbddc->B0_ncol; 795 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 796 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 797 ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 798 ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 799 ierr = MatDestroy(&B0);CHKERRQ(ierr); 800 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 801 } 802 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 803 } 804 805 /* Matrix of coarse basis functions (local) */ 806 if (pcbddc->coarse_phi_B) { 807 PetscInt on_B,on_primal,on_D=n_D; 808 if (pcbddc->coarse_phi_D) { 809 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 810 } 811 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 812 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 813 PetscScalar *marray; 814 815 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 816 ierr = PetscFree(marray);CHKERRQ(ierr); 817 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 818 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 819 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 820 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 821 } 822 } 823 824 if (!pcbddc->coarse_phi_B) { 825 PetscScalar *marray; 826 827 n = n_B*pcbddc->local_primal_size; 828 if (pcbddc->switch_static || pcbddc->dbg_flag) { 829 n += n_D*pcbddc->local_primal_size; 830 } 831 if (!pcbddc->symmetric_primal) { 832 n *= 2; 833 } 834 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 835 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 836 n = n_B*pcbddc->local_primal_size; 837 if (pcbddc->switch_static || pcbddc->dbg_flag) { 838 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 839 n += n_D*pcbddc->local_primal_size; 840 } 841 if (!pcbddc->symmetric_primal) { 842 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 843 if (pcbddc->switch_static || pcbddc->dbg_flag) { 844 n = n_B*pcbddc->local_primal_size; 845 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 846 } 847 } else { 848 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 849 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 850 if (pcbddc->switch_static || pcbddc->dbg_flag) { 851 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 852 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 853 } 854 } 855 } 856 857 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 858 /* vertices */ 859 if (n_vertices) { 860 861 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 862 863 if (n_R) { 864 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 865 Mat S_V_benign; 866 PetscBLASInt B_N,B_one = 1; 867 PetscScalar *x,*y; 868 PetscBool isseqaij; 869 870 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 871 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 872 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 873 if (F) { /* TODO could be optimized for symmetric problems */ 874 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 875 } else { 876 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 877 for (i=0;i<n_vertices;i++) { 878 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 879 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 880 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 881 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 882 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 883 } 884 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 885 } 886 if (n_benign) { 887 const PetscScalar *vals; 888 const PetscInt *idxs; 889 PetscInt n; 890 891 ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign);CHKERRQ(ierr); 892 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 893 ierr = MatGetRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr); 894 ierr = VecSetValues(pcbddc->vec1_R,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 895 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 896 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 897 ierr = MatRestoreRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr); 898 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,vec_R_benign);CHKERRQ(ierr); 899 ierr = VecScale(vec_R_benign,m_one);CHKERRQ(ierr); 900 } 901 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 902 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 903 if (n_constraints) { 904 Mat B; 905 906 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 907 for (i=0;i<n_vertices;i++) { 908 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 909 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 910 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 911 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 912 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 913 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 914 } 915 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 916 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 917 ierr = MatDestroy(&B);CHKERRQ(ierr); 918 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 919 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 920 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 921 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 922 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 923 ierr = MatDestroy(&B);CHKERRQ(ierr); 924 } 925 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 926 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 927 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 928 } 929 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 930 if (n_benign) { 931 ierr = MatMatMult(B0_R,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_V_benign);CHKERRQ(ierr); 932 } 933 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 934 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 935 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 936 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 937 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 938 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 939 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 940 if (!n_benign) { 941 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 942 } else { 943 const PetscScalar *vals; 944 PetscScalar val; 945 const PetscInt *idxs; 946 PetscInt n,j; 947 948 /* Use explicit loops to populate the local coarse matrix */ 949 ierr = MatDenseGetArray(S_VVt,&x);CHKERRQ(ierr); 950 for (j=0;j<n_vertices;j++) { 951 for (i=0;i<n_vertices;i++) { 952 coarse_submat_vals[j*pcbddc->local_primal_size+i] = x[j*n_vertices+i]; 953 } 954 } 955 ierr = MatDenseRestoreArray(S_VVt,&x);CHKERRQ(ierr); 956 ierr = MatDenseGetArray(S_V_benign,&x);CHKERRQ(ierr); 957 ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 958 for (j=0;j<n_vertices;j++) { 959 coarse_submat_vals[n_vertices*pcbddc->local_primal_size+j] = x[j]+vals[j]; 960 coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices] = x[j]+vals[j]; 961 } 962 ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 963 ierr = MatDenseRestoreArray(S_V_benign,&x);CHKERRQ(ierr); 964 ierr = MatDestroy(&S_V_benign);CHKERRQ(ierr); 965 ierr = VecDot(vec_R_benign,vec_R_benign,&val);CHKERRQ(ierr); 966 coarse_submat_vals[pcbddc->local_primal_size*pcbddc->local_primal_size-1] = -val; 967 } 968 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 969 } else { 970 /* TODO benign case also here */ 971 if (n_benign) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not yet implemented"); 972 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 973 } 974 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 975 /* coarse basis functions */ 976 for (i=0;i<n_vertices;i++) { 977 PetscScalar *y; 978 979 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 980 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 981 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 982 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 983 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 984 y[n_B*i+idx_V_B[i]] = 1.0; 985 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 986 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 987 988 if (pcbddc->switch_static || pcbddc->dbg_flag) { 989 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 990 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 991 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 993 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 994 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 995 } 996 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 997 } 998 if (n_benign) { 999 PetscScalar *y; 1000 1001 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1002 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*n_vertices);CHKERRQ(ierr); 1003 ierr = VecScatterBegin(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 ierr = VecScatterEnd(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1006 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1007 1008 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1009 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1010 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1011 ierr = VecScatterBegin(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1012 ierr = VecScatterEnd(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1013 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1014 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1015 } 1016 } 1017 /* if n_R == 0 the object is not destroyed */ 1018 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1019 } 1020 1021 if (n_constraints) { 1022 Mat B; 1023 1024 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1025 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1026 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1027 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1028 if (n_vertices) { 1029 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1030 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1031 } else { 1032 Mat S_VCt; 1033 1034 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1035 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1036 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1037 } 1038 } 1039 ierr = MatDestroy(&B);CHKERRQ(ierr); 1040 /* coarse basis functions */ 1041 for (i=0;i<n_constraints;i++) { 1042 PetscScalar *y; 1043 1044 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1045 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1046 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1047 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1048 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1049 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1050 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1051 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1052 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1053 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1054 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1055 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1056 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1057 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1058 } 1059 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1060 } 1061 } 1062 if (n_constraints) { 1063 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1064 } 1065 1066 if (n_benign) { 1067 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1068 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1069 ierr = VecDestroy(&vec_R_benign);CHKERRQ(ierr); 1070 } 1071 1072 /* compute other basis functions for non-symmetric problems */ 1073 if (!pcbddc->symmetric_primal) { 1074 1075 if (n_constraints) { 1076 Mat S_CCT,B_C; 1077 1078 /* this is a lazy thing */ 1079 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1080 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1081 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1082 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1083 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1084 if (n_vertices) { 1085 Mat B_V,S_VCT; 1086 1087 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1088 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1089 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1090 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1091 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1092 } 1093 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1094 } else { /* if there are no constraints, reset work */ 1095 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1096 } 1097 if (n_vertices && n_R) { 1098 Mat A_VRT; 1099 PetscScalar *marray; 1100 PetscBLASInt B_N,B_one = 1; 1101 1102 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1103 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1104 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1105 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1106 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1107 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1108 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1109 } 1110 1111 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1112 for (i=0;i<pcbddc->local_primal_size;i++) { 1113 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1114 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1115 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1116 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1117 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1118 } 1119 } else { 1120 for (i=0;i<pcbddc->local_primal_size;i++) { 1121 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1122 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1123 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1124 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1125 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1126 } 1127 } 1128 /* coarse basis functions */ 1129 for (i=0;i<pcbddc->local_primal_size;i++) { 1130 PetscScalar *y; 1131 1132 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1133 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1134 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1135 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1136 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1137 if (i<n_vertices) { 1138 y[n_B*i+idx_V_B[i]] = 1.0; 1139 } 1140 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1141 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1142 1143 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1144 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1145 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1146 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1147 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1148 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1149 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1150 } 1151 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1152 } 1153 } 1154 /* free memory */ 1155 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1156 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1157 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1158 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1159 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1160 ierr = PetscFree(work);CHKERRQ(ierr); 1161 if (n_vertices) { 1162 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1163 } 1164 if (n_constraints) { 1165 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1166 } 1167 /* Checking coarse_sub_mat and coarse basis functios */ 1168 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1169 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1170 //if (pcbddc->dbg_flag) { 1171 if (0) { 1172 Mat coarse_sub_mat; 1173 Mat AUXMAT,TM1,TM2,TM3,TM4; 1174 Mat coarse_phi_D,coarse_phi_B; 1175 Mat coarse_psi_D,coarse_psi_B; 1176 Mat A_II,A_BB,A_IB,A_BI; 1177 Mat C_B,CPHI; 1178 IS is_dummy; 1179 Vec mones; 1180 MatType checkmattype=MATSEQAIJ; 1181 PetscReal real_value; 1182 1183 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1184 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1185 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1186 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1187 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1188 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1189 if (unsymmetric_check) { 1190 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1191 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1192 } 1193 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1194 1195 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1196 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1197 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1198 if (unsymmetric_check) { 1199 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1200 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1201 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1202 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1203 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1204 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1205 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1206 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1207 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1208 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1209 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1210 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1211 } else { 1212 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1213 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1214 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1215 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1216 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1217 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1218 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1219 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1220 } 1221 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1222 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1223 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1224 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1225 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1226 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1227 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1228 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1229 1230 /* check constraints */ 1231 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1232 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1233 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1234 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1235 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1236 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1237 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1238 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1239 if (unsymmetric_check) { 1240 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1241 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1242 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1243 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1244 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1245 } 1246 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1247 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1248 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1249 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1250 1251 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1252 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1253 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1254 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1255 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1256 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1257 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1258 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1259 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1260 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1261 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1262 if (unsymmetric_check) { 1263 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1264 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1265 } 1266 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1267 } 1268 /* get back data */ 1269 *coarse_submat_vals_n = coarse_submat_vals; 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1275 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1276 { 1277 Mat *work_mat; 1278 IS isrow_s,iscol_s; 1279 PetscBool rsorted,csorted; 1280 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1281 PetscErrorCode ierr; 1282 1283 PetscFunctionBegin; 1284 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1285 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1286 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1287 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1288 1289 if (!rsorted) { 1290 const PetscInt *idxs; 1291 PetscInt *idxs_sorted,i; 1292 1293 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1294 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1295 for (i=0;i<rsize;i++) { 1296 idxs_perm_r[i] = i; 1297 } 1298 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1299 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1300 for (i=0;i<rsize;i++) { 1301 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1302 } 1303 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1304 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1305 } else { 1306 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1307 isrow_s = isrow; 1308 } 1309 1310 if (!csorted) { 1311 if (isrow == iscol) { 1312 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1313 iscol_s = isrow_s; 1314 } else { 1315 const PetscInt *idxs; 1316 PetscInt *idxs_sorted,i; 1317 1318 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1319 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1320 for (i=0;i<csize;i++) { 1321 idxs_perm_c[i] = i; 1322 } 1323 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1324 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1325 for (i=0;i<csize;i++) { 1326 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1327 } 1328 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1329 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1330 } 1331 } else { 1332 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1333 iscol_s = iscol; 1334 } 1335 1336 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1337 1338 if (!rsorted || !csorted) { 1339 Mat new_mat; 1340 IS is_perm_r,is_perm_c; 1341 1342 if (!rsorted) { 1343 PetscInt *idxs_r,i; 1344 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1345 for (i=0;i<rsize;i++) { 1346 idxs_r[idxs_perm_r[i]] = i; 1347 } 1348 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1349 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1350 } else { 1351 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1352 } 1353 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1354 1355 if (!csorted) { 1356 if (isrow_s == iscol_s) { 1357 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1358 is_perm_c = is_perm_r; 1359 } else { 1360 PetscInt *idxs_c,i; 1361 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1362 for (i=0;i<csize;i++) { 1363 idxs_c[idxs_perm_c[i]] = i; 1364 } 1365 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1366 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1367 } 1368 } else { 1369 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1370 } 1371 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1372 1373 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1374 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1375 work_mat[0] = new_mat; 1376 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1377 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1378 } 1379 1380 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1381 *B = work_mat[0]; 1382 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1383 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1384 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1390 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1391 { 1392 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1393 PC_IS* pcis = (PC_IS*)pc->data; 1394 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1395 Mat new_mat; 1396 IS is_local,is_global; 1397 PetscInt local_size; 1398 PetscBool isseqaij; 1399 PetscErrorCode ierr; 1400 1401 PetscFunctionBegin; 1402 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1403 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1404 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1405 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1406 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1407 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1408 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1409 1410 /* check */ 1411 if (pcbddc->dbg_flag) { 1412 Vec x,x_change; 1413 PetscReal error; 1414 1415 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1416 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1417 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1418 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1419 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1420 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1421 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1422 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1423 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1424 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1425 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1426 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1427 ierr = VecDestroy(&x);CHKERRQ(ierr); 1428 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1429 } 1430 1431 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1432 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1433 if (isseqaij) { 1434 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1435 } else { 1436 Mat work_mat; 1437 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1438 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1439 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1440 } 1441 if (matis->A->symmetric_set) { 1442 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1443 #if !defined(PETSC_USE_COMPLEX) 1444 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1445 #endif 1446 } 1447 /* 1448 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1449 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1450 */ 1451 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1452 PetscFunctionReturn(0); 1453 } 1454 1455 #undef __FUNCT__ 1456 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1457 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1458 { 1459 PC_IS* pcis = (PC_IS*)(pc->data); 1460 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1461 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1462 PetscInt *idx_R_local=NULL; 1463 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1464 PetscInt vbs,bs; 1465 PetscBT bitmask=NULL; 1466 PetscErrorCode ierr; 1467 1468 PetscFunctionBegin; 1469 /* 1470 No need to setup local scatters if 1471 - primal space is unchanged 1472 AND 1473 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1474 AND 1475 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1476 */ 1477 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1478 PetscFunctionReturn(0); 1479 } 1480 /* destroy old objects */ 1481 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1482 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1483 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1484 /* Set Non-overlapping dimensions */ 1485 n_B = pcis->n_B; 1486 n_D = pcis->n - n_B; 1487 n_vertices = pcbddc->n_vertices; 1488 1489 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1490 1491 /* create auxiliary bitmask and allocate workspace */ 1492 if (!sub_schurs->reuse_mumps) { 1493 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1494 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1495 for (i=0;i<n_vertices;i++) { 1496 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1497 } 1498 1499 for (i=0, n_R=0; i<pcis->n; i++) { 1500 if (!PetscBTLookup(bitmask,i)) { 1501 idx_R_local[n_R++] = i; 1502 } 1503 } 1504 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1505 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1506 1507 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1508 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1509 } 1510 1511 /* Block code */ 1512 vbs = 1; 1513 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1514 if (bs>1 && !(n_vertices%bs)) { 1515 PetscBool is_blocked = PETSC_TRUE; 1516 PetscInt *vary; 1517 if (!sub_schurs->reuse_mumps) { 1518 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1519 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1520 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1521 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1522 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1523 for (i=0; i<pcis->n/bs; i++) { 1524 if (vary[i]!=0 && vary[i]!=bs) { 1525 is_blocked = PETSC_FALSE; 1526 break; 1527 } 1528 } 1529 ierr = PetscFree(vary);CHKERRQ(ierr); 1530 } else { 1531 /* Verify directly the R set */ 1532 for (i=0; i<n_R/bs; i++) { 1533 PetscInt j,node=idx_R_local[bs*i]; 1534 for (j=1; j<bs; j++) { 1535 if (node != idx_R_local[bs*i+j]-j) { 1536 is_blocked = PETSC_FALSE; 1537 break; 1538 } 1539 } 1540 } 1541 } 1542 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1543 vbs = bs; 1544 for (i=0;i<n_R/vbs;i++) { 1545 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1546 } 1547 } 1548 } 1549 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1550 if (sub_schurs->reuse_mumps) { 1551 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1552 1553 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1554 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1555 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1556 reuse_mumps->is_R = pcbddc->is_R_local; 1557 } else { 1558 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1559 } 1560 1561 /* print some info if requested */ 1562 if (pcbddc->dbg_flag) { 1563 PetscInt benign; 1564 1565 benign = (PetscInt)(!!pcbddc->zerodiag); 1566 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1567 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1568 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1569 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1570 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1571 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr); 1572 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1573 } 1574 1575 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1576 if (!sub_schurs->reuse_mumps) { 1577 IS is_aux1,is_aux2; 1578 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1579 1580 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1581 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1582 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1583 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1584 for (i=0; i<n_D; i++) { 1585 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1586 } 1587 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1588 for (i=0, j=0; i<n_R; i++) { 1589 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1590 aux_array1[j++] = i; 1591 } 1592 } 1593 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1594 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1595 for (i=0, j=0; i<n_B; i++) { 1596 if (!PetscBTLookup(bitmask,is_indices[i])) { 1597 aux_array2[j++] = i; 1598 } 1599 } 1600 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1601 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1602 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1603 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1604 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1605 1606 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1607 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1608 for (i=0, j=0; i<n_R; i++) { 1609 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1610 aux_array1[j++] = i; 1611 } 1612 } 1613 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1614 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1615 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1616 } 1617 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1618 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1619 } else { 1620 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1621 IS tis; 1622 PetscInt schur_size; 1623 1624 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1625 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1626 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1627 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1628 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1629 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1630 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1631 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1632 } 1633 } 1634 PetscFunctionReturn(0); 1635 } 1636 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1640 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1641 { 1642 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1643 PC_IS *pcis = (PC_IS*)pc->data; 1644 PC pc_temp; 1645 Mat A_RR; 1646 MatReuse reuse; 1647 PetscScalar m_one = -1.0; 1648 PetscReal value; 1649 PetscInt n_D,n_R; 1650 PetscBool use_exact,use_exact_reduced,issbaij; 1651 PetscErrorCode ierr; 1652 /* prefixes stuff */ 1653 char dir_prefix[256],neu_prefix[256],str_level[16]; 1654 size_t len; 1655 1656 PetscFunctionBegin; 1657 1658 /* compute prefixes */ 1659 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1660 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1661 if (!pcbddc->current_level) { 1662 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1663 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1664 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1665 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1666 } else { 1667 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1668 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1669 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1670 len -= 15; /* remove "pc_bddc_coarse_" */ 1671 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1672 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1673 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1674 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1675 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1676 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1677 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1678 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1679 } 1680 1681 /* DIRICHLET PROBLEM */ 1682 if (dirichlet) { 1683 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1684 if (pcbddc->local_mat->symmetric_set) { 1685 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1686 } 1687 /* Matrix for Dirichlet problem is pcis->A_II */ 1688 n_D = pcis->n - pcis->n_B; 1689 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1690 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1691 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1692 /* default */ 1693 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1694 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1695 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1696 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1697 if (issbaij) { 1698 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1699 } else { 1700 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1701 } 1702 /* Allow user's customization */ 1703 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1704 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1705 } 1706 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1707 if (sub_schurs->reuse_mumps) { 1708 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1709 1710 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1711 } 1712 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1713 if (!n_D) { 1714 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1715 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1716 } 1717 /* Set Up KSP for Dirichlet problem of BDDC */ 1718 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1719 /* set ksp_D into pcis data */ 1720 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1721 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1722 pcis->ksp_D = pcbddc->ksp_D; 1723 } 1724 1725 /* NEUMANN PROBLEM */ 1726 A_RR = 0; 1727 if (neumann) { 1728 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1729 PetscInt ibs,mbs; 1730 PetscBool issbaij; 1731 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1732 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1733 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1734 if (pcbddc->ksp_R) { /* already created ksp */ 1735 PetscInt nn_R; 1736 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1737 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1738 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1739 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1740 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1741 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1742 reuse = MAT_INITIAL_MATRIX; 1743 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1744 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1745 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1746 reuse = MAT_INITIAL_MATRIX; 1747 } else { /* safe to reuse the matrix */ 1748 reuse = MAT_REUSE_MATRIX; 1749 } 1750 } 1751 /* last check */ 1752 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1753 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1754 reuse = MAT_INITIAL_MATRIX; 1755 } 1756 } else { /* first time, so we need to create the matrix */ 1757 reuse = MAT_INITIAL_MATRIX; 1758 } 1759 /* extract A_RR */ 1760 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1761 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1762 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1763 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1764 if (matis->A == pcbddc->local_mat) { 1765 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1766 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1767 } else { 1768 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1769 } 1770 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1771 if (matis->A == pcbddc->local_mat) { 1772 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1773 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1774 } else { 1775 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1776 } 1777 } 1778 if (!sub_schurs->reuse_mumps) { 1779 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1780 if (pcbddc->local_mat->symmetric_set) { 1781 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1782 } 1783 } else { 1784 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1785 1786 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1787 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1788 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1789 } 1790 if (!pcbddc->ksp_R) { /* create object if not present */ 1791 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1792 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1793 /* default */ 1794 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1795 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1796 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1797 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1798 if (issbaij) { 1799 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1800 } else { 1801 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1802 } 1803 /* Allow user's customization */ 1804 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1805 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1806 } 1807 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1808 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1809 if (!n_R) { 1810 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1811 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1812 } 1813 /* Reuse MUMPS solver if it is present */ 1814 if (sub_schurs->reuse_mumps) { 1815 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1816 1817 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1818 } 1819 /* Set Up KSP for Neumann problem of BDDC */ 1820 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1821 } 1822 /* free Neumann problem's matrix */ 1823 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1824 1825 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1826 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1827 if (pcbddc->dbg_flag) { 1828 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1829 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1830 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1831 } 1832 if (dirichlet) { /* Dirichlet */ 1833 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1834 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1835 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1836 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1837 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1838 /* need to be adapted? */ 1839 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1840 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1841 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1842 /* print info */ 1843 if (pcbddc->dbg_flag) { 1844 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1845 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1846 } 1847 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1848 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1849 } 1850 } 1851 if (neumann) { /* Neumann */ 1852 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1853 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1854 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1855 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1856 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1857 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1858 /* need to be adapted? */ 1859 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1860 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1861 /* print info */ 1862 if (pcbddc->dbg_flag) { 1863 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1864 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1865 } 1866 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1867 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1868 } 1869 } 1870 } 1871 PetscFunctionReturn(0); 1872 } 1873 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1876 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1877 { 1878 PetscErrorCode ierr; 1879 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1880 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1881 1882 PetscFunctionBegin; 1883 if (!sub_schurs->reuse_mumps) { 1884 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1885 } 1886 if (!pcbddc->switch_static) { 1887 if (applytranspose && pcbddc->local_auxmat1) { 1888 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1889 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1890 } 1891 if (!sub_schurs->reuse_mumps) { 1892 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1893 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1894 } else { 1895 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1896 1897 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1898 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1899 } 1900 } else { 1901 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1902 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1903 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1904 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1905 if (applytranspose && pcbddc->local_auxmat1) { 1906 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1907 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1908 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1909 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1910 } 1911 } 1912 if (!sub_schurs->reuse_mumps) { 1913 if (applytranspose) { 1914 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1915 } else { 1916 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1917 } 1918 #if defined(PETSC_HAVE_MUMPS) 1919 } else { 1920 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1921 1922 if (applytranspose) { 1923 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1924 } else { 1925 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1926 } 1927 #endif 1928 } 1929 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1930 if (!pcbddc->switch_static) { 1931 if (!sub_schurs->reuse_mumps) { 1932 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1933 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1934 } else { 1935 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1936 1937 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1938 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1939 } 1940 if (!applytranspose && pcbddc->local_auxmat1) { 1941 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1942 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1943 } 1944 } else { 1945 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1946 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1947 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1948 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1949 if (!applytranspose && pcbddc->local_auxmat1) { 1950 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1951 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1952 } 1953 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1954 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1955 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1956 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1957 } 1958 PetscFunctionReturn(0); 1959 } 1960 1961 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1964 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1965 { 1966 PetscErrorCode ierr; 1967 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1968 PC_IS* pcis = (PC_IS*) (pc->data); 1969 const PetscScalar zero = 0.0; 1970 1971 PetscFunctionBegin; 1972 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1973 if (applytranspose) { 1974 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1975 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1976 } else { 1977 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1978 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1979 } 1980 /* start communications from local primal nodes to rhs of coarse solver */ 1981 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1982 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1983 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1984 1985 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1986 /* TODO remove null space when doing multilevel */ 1987 if (pcbddc->coarse_ksp) { 1988 Vec rhs,sol; 1989 1990 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1991 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1992 if (applytranspose) { 1993 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1994 } else { 1995 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1996 } 1997 } 1998 1999 /* Local solution on R nodes */ 2000 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2001 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2002 } 2003 2004 /* communications from coarse sol to local primal nodes */ 2005 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2006 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2007 2008 /* Sum contributions from two levels */ 2009 if (applytranspose) { 2010 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2011 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2012 } else { 2013 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2014 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2015 } 2016 PetscFunctionReturn(0); 2017 } 2018 2019 #undef __FUNCT__ 2020 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2021 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2022 { 2023 PetscErrorCode ierr; 2024 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2025 PetscScalar *array; 2026 Vec from,to; 2027 2028 PetscFunctionBegin; 2029 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2030 from = pcbddc->coarse_vec; 2031 to = pcbddc->vec1_P; 2032 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2033 Vec tvec; 2034 2035 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2036 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2037 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2038 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2039 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2040 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2041 } 2042 } else { /* from local to global -> put data in coarse right hand side */ 2043 from = pcbddc->vec1_P; 2044 to = pcbddc->coarse_vec; 2045 } 2046 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2047 PetscFunctionReturn(0); 2048 } 2049 2050 #undef __FUNCT__ 2051 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2052 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2053 { 2054 PetscErrorCode ierr; 2055 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2056 PetscScalar *array; 2057 Vec from,to; 2058 2059 PetscFunctionBegin; 2060 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2061 from = pcbddc->coarse_vec; 2062 to = pcbddc->vec1_P; 2063 } else { /* from local to global -> put data in coarse right hand side */ 2064 from = pcbddc->vec1_P; 2065 to = pcbddc->coarse_vec; 2066 } 2067 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2068 if (smode == SCATTER_FORWARD) { 2069 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2070 Vec tvec; 2071 2072 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2073 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2074 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2075 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2076 } 2077 } else { 2078 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2079 ierr = VecResetArray(from);CHKERRQ(ierr); 2080 } 2081 } 2082 PetscFunctionReturn(0); 2083 } 2084 2085 /* uncomment for testing purposes */ 2086 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2089 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2090 { 2091 PetscErrorCode ierr; 2092 PC_IS* pcis = (PC_IS*)(pc->data); 2093 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2094 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2095 /* one and zero */ 2096 PetscScalar one=1.0,zero=0.0; 2097 /* space to store constraints and their local indices */ 2098 PetscScalar *constraints_data; 2099 PetscInt *constraints_idxs,*constraints_idxs_B; 2100 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2101 PetscInt *constraints_n; 2102 /* iterators */ 2103 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2104 /* BLAS integers */ 2105 PetscBLASInt lwork,lierr; 2106 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2107 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2108 /* reuse */ 2109 PetscInt olocal_primal_size,olocal_primal_size_cc; 2110 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2111 /* change of basis */ 2112 PetscBool qr_needed; 2113 PetscBT change_basis,qr_needed_idx; 2114 /* auxiliary stuff */ 2115 PetscInt *nnz,*is_indices; 2116 PetscInt ncc; 2117 /* some quantities */ 2118 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2119 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2120 2121 PetscFunctionBegin; 2122 /* Destroy Mat objects computed previously */ 2123 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2124 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2125 /* save info on constraints from previous setup (if any) */ 2126 olocal_primal_size = pcbddc->local_primal_size; 2127 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2128 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2129 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2130 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2131 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2132 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2133 2134 /* print some info */ 2135 if (pcbddc->dbg_flag) { 2136 IS vertices; 2137 PetscInt nv,nedges,nfaces; 2138 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2139 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2140 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2141 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2142 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2143 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2144 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2145 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2146 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2147 } 2148 2149 if (!pcbddc->adaptive_selection) { 2150 IS ISForVertices,*ISForFaces,*ISForEdges; 2151 MatNullSpace nearnullsp; 2152 const Vec *nearnullvecs; 2153 Vec *localnearnullsp; 2154 PetscScalar *array; 2155 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2156 PetscBool nnsp_has_cnst; 2157 /* LAPACK working arrays for SVD or POD */ 2158 PetscBool skip_lapack,boolforchange; 2159 PetscScalar *work; 2160 PetscReal *singular_vals; 2161 #if defined(PETSC_USE_COMPLEX) 2162 PetscReal *rwork; 2163 #endif 2164 #if defined(PETSC_MISSING_LAPACK_GESVD) 2165 PetscScalar *temp_basis,*correlation_mat; 2166 #else 2167 PetscBLASInt dummy_int=1; 2168 PetscScalar dummy_scalar=1.; 2169 #endif 2170 2171 /* Get index sets for faces, edges and vertices from graph */ 2172 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2173 /* free unneeded index sets */ 2174 if (!pcbddc->use_vertices) { 2175 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2176 } 2177 if (!pcbddc->use_edges) { 2178 for (i=0;i<n_ISForEdges;i++) { 2179 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2180 } 2181 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2182 n_ISForEdges = 0; 2183 } 2184 if (!pcbddc->use_faces) { 2185 for (i=0;i<n_ISForFaces;i++) { 2186 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2187 } 2188 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2189 n_ISForFaces = 0; 2190 } 2191 2192 #if defined(PETSC_USE_DEBUG) 2193 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2194 Also use_change_of_basis should be consistent among processors */ 2195 if (pcbddc->NullSpace) { 2196 PetscBool tbool[2],gbool[2]; 2197 2198 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2199 pcbddc->use_change_of_basis = PETSC_TRUE; 2200 if (!ISForEdges) { 2201 pcbddc->use_change_on_faces = PETSC_TRUE; 2202 } 2203 } 2204 tbool[0] = pcbddc->use_change_of_basis; 2205 tbool[1] = pcbddc->use_change_on_faces; 2206 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2207 pcbddc->use_change_of_basis = gbool[0]; 2208 pcbddc->use_change_on_faces = gbool[1]; 2209 } 2210 #endif 2211 2212 /* check if near null space is attached to global mat */ 2213 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2214 if (nearnullsp) { 2215 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2216 /* remove any stored info */ 2217 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2218 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2219 /* store information for BDDC solver reuse */ 2220 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2221 pcbddc->onearnullspace = nearnullsp; 2222 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2223 for (i=0;i<nnsp_size;i++) { 2224 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2225 } 2226 } else { /* if near null space is not provided BDDC uses constants by default */ 2227 nnsp_size = 0; 2228 nnsp_has_cnst = PETSC_TRUE; 2229 } 2230 /* get max number of constraints on a single cc */ 2231 max_constraints = nnsp_size; 2232 if (nnsp_has_cnst) max_constraints++; 2233 2234 /* 2235 Evaluate maximum storage size needed by the procedure 2236 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2237 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2238 There can be multiple constraints per connected component 2239 */ 2240 n_vertices = 0; 2241 if (ISForVertices) { 2242 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2243 } 2244 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2245 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2246 2247 total_counts = n_ISForFaces+n_ISForEdges; 2248 total_counts *= max_constraints; 2249 total_counts += n_vertices; 2250 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2251 2252 total_counts = 0; 2253 max_size_of_constraint = 0; 2254 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2255 IS used_is; 2256 if (i<n_ISForEdges) { 2257 used_is = ISForEdges[i]; 2258 } else { 2259 used_is = ISForFaces[i-n_ISForEdges]; 2260 } 2261 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2262 total_counts += j; 2263 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2264 } 2265 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2266 2267 /* get local part of global near null space vectors */ 2268 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2269 for (k=0;k<nnsp_size;k++) { 2270 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2271 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2272 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2273 } 2274 2275 /* whether or not to skip lapack calls */ 2276 skip_lapack = PETSC_TRUE; 2277 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2278 2279 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2280 if (!skip_lapack) { 2281 PetscScalar temp_work; 2282 2283 #if defined(PETSC_MISSING_LAPACK_GESVD) 2284 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2285 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2286 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2287 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2288 #if defined(PETSC_USE_COMPLEX) 2289 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2290 #endif 2291 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2292 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2293 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2294 lwork = -1; 2295 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2296 #if !defined(PETSC_USE_COMPLEX) 2297 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2298 #else 2299 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2300 #endif 2301 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2302 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2303 #else /* on missing GESVD */ 2304 /* SVD */ 2305 PetscInt max_n,min_n; 2306 max_n = max_size_of_constraint; 2307 min_n = max_constraints; 2308 if (max_size_of_constraint < max_constraints) { 2309 min_n = max_size_of_constraint; 2310 max_n = max_constraints; 2311 } 2312 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2313 #if defined(PETSC_USE_COMPLEX) 2314 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2315 #endif 2316 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2317 lwork = -1; 2318 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2319 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2320 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2321 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2322 #if !defined(PETSC_USE_COMPLEX) 2323 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2324 #else 2325 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2326 #endif 2327 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2328 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2329 #endif /* on missing GESVD */ 2330 /* Allocate optimal workspace */ 2331 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2332 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2333 } 2334 /* Now we can loop on constraining sets */ 2335 total_counts = 0; 2336 constraints_idxs_ptr[0] = 0; 2337 constraints_data_ptr[0] = 0; 2338 /* vertices */ 2339 if (n_vertices) { 2340 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2341 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2342 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2343 for (i=0;i<n_vertices;i++) { 2344 constraints_n[total_counts] = 1; 2345 constraints_data[total_counts] = 1.0; 2346 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2347 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2348 total_counts++; 2349 } 2350 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2351 PetscBool used_vertex; 2352 for (i=0;i<n_vertices;i++) { 2353 used_vertex = PETSC_FALSE; 2354 k = 0; 2355 while (!used_vertex && k<nnsp_size) { 2356 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2357 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2358 constraints_n[total_counts] = 1; 2359 constraints_idxs[total_counts] = is_indices[i]; 2360 constraints_data[total_counts] = 1.0; 2361 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2362 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2363 total_counts++; 2364 used_vertex = PETSC_TRUE; 2365 } 2366 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2367 k++; 2368 } 2369 } 2370 } 2371 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2372 n_vertices = total_counts; 2373 } 2374 2375 /* edges and faces */ 2376 total_counts_cc = total_counts; 2377 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2378 IS used_is; 2379 PetscBool idxs_copied = PETSC_FALSE; 2380 2381 if (ncc<n_ISForEdges) { 2382 used_is = ISForEdges[ncc]; 2383 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2384 } else { 2385 used_is = ISForFaces[ncc-n_ISForEdges]; 2386 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2387 } 2388 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2389 2390 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2391 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2392 /* change of basis should not be performed on local periodic nodes */ 2393 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2394 if (nnsp_has_cnst) { 2395 PetscScalar quad_value; 2396 2397 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2398 idxs_copied = PETSC_TRUE; 2399 2400 if (!pcbddc->use_nnsp_true) { 2401 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2402 } else { 2403 quad_value = 1.0; 2404 } 2405 for (j=0;j<size_of_constraint;j++) { 2406 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2407 } 2408 temp_constraints++; 2409 total_counts++; 2410 } 2411 for (k=0;k<nnsp_size;k++) { 2412 PetscReal real_value; 2413 PetscScalar *ptr_to_data; 2414 2415 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2416 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2417 for (j=0;j<size_of_constraint;j++) { 2418 ptr_to_data[j] = array[is_indices[j]]; 2419 } 2420 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2421 /* check if array is null on the connected component */ 2422 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2423 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2424 if (real_value > 0.0) { /* keep indices and values */ 2425 temp_constraints++; 2426 total_counts++; 2427 if (!idxs_copied) { 2428 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2429 idxs_copied = PETSC_TRUE; 2430 } 2431 } 2432 } 2433 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2434 valid_constraints = temp_constraints; 2435 if (!pcbddc->use_nnsp_true && temp_constraints) { 2436 if (temp_constraints == 1) { /* just normalize the constraint */ 2437 PetscScalar norm,*ptr_to_data; 2438 2439 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2440 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2441 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2442 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2443 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2444 } else { /* perform SVD */ 2445 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2446 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2447 2448 #if defined(PETSC_MISSING_LAPACK_GESVD) 2449 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2450 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2451 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2452 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2453 from that computed using LAPACKgesvd 2454 -> This is due to a different computation of eigenvectors in LAPACKheev 2455 -> The quality of the POD-computed basis will be the same */ 2456 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2457 /* Store upper triangular part of correlation matrix */ 2458 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2459 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2460 for (j=0;j<temp_constraints;j++) { 2461 for (k=0;k<j+1;k++) { 2462 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2463 } 2464 } 2465 /* compute eigenvalues and eigenvectors of correlation matrix */ 2466 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2467 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2468 #if !defined(PETSC_USE_COMPLEX) 2469 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2470 #else 2471 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2472 #endif 2473 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2474 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2475 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2476 j = 0; 2477 while (j < temp_constraints && singular_vals[j] < tol) j++; 2478 total_counts = total_counts-j; 2479 valid_constraints = temp_constraints-j; 2480 /* scale and copy POD basis into used quadrature memory */ 2481 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2482 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2483 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2484 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2485 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2486 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2487 if (j<temp_constraints) { 2488 PetscInt ii; 2489 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2490 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2491 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2492 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2493 for (k=0;k<temp_constraints-j;k++) { 2494 for (ii=0;ii<size_of_constraint;ii++) { 2495 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2496 } 2497 } 2498 } 2499 #else /* on missing GESVD */ 2500 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2501 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2502 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2503 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2504 #if !defined(PETSC_USE_COMPLEX) 2505 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2506 #else 2507 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2508 #endif 2509 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2510 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2511 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2512 k = temp_constraints; 2513 if (k > size_of_constraint) k = size_of_constraint; 2514 j = 0; 2515 while (j < k && singular_vals[k-j-1] < tol) j++; 2516 valid_constraints = k-j; 2517 total_counts = total_counts-temp_constraints+valid_constraints; 2518 #endif /* on missing GESVD */ 2519 } 2520 } 2521 /* update pointers information */ 2522 if (valid_constraints) { 2523 constraints_n[total_counts_cc] = valid_constraints; 2524 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2525 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2526 /* set change_of_basis flag */ 2527 if (boolforchange) { 2528 PetscBTSet(change_basis,total_counts_cc); 2529 } 2530 total_counts_cc++; 2531 } 2532 } 2533 /* free workspace */ 2534 if (!skip_lapack) { 2535 ierr = PetscFree(work);CHKERRQ(ierr); 2536 #if defined(PETSC_USE_COMPLEX) 2537 ierr = PetscFree(rwork);CHKERRQ(ierr); 2538 #endif 2539 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2540 #if defined(PETSC_MISSING_LAPACK_GESVD) 2541 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2542 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2543 #endif 2544 } 2545 for (k=0;k<nnsp_size;k++) { 2546 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2547 } 2548 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2549 /* free index sets of faces, edges and vertices */ 2550 for (i=0;i<n_ISForFaces;i++) { 2551 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2552 } 2553 if (n_ISForFaces) { 2554 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2555 } 2556 for (i=0;i<n_ISForEdges;i++) { 2557 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2558 } 2559 if (n_ISForEdges) { 2560 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2561 } 2562 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2563 } else { 2564 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2565 2566 total_counts = 0; 2567 n_vertices = 0; 2568 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2569 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2570 } 2571 max_constraints = 0; 2572 total_counts_cc = 0; 2573 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2574 total_counts += pcbddc->adaptive_constraints_n[i]; 2575 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2576 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2577 } 2578 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2579 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2580 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2581 constraints_data = pcbddc->adaptive_constraints_data; 2582 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2583 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2584 total_counts_cc = 0; 2585 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2586 if (pcbddc->adaptive_constraints_n[i]) { 2587 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2588 } 2589 } 2590 #if 0 2591 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2592 for (i=0;i<total_counts_cc;i++) { 2593 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2594 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2595 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2596 printf(" %d",constraints_idxs[j]); 2597 } 2598 printf("\n"); 2599 printf("number of cc: %d\n",constraints_n[i]); 2600 } 2601 for (i=0;i<n_vertices;i++) { 2602 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2603 } 2604 for (i=0;i<sub_schurs->n_subs;i++) { 2605 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2606 } 2607 #endif 2608 2609 max_size_of_constraint = 0; 2610 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2611 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2612 /* Change of basis */ 2613 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2614 if (pcbddc->use_change_of_basis) { 2615 for (i=0;i<sub_schurs->n_subs;i++) { 2616 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2617 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2618 } 2619 } 2620 } 2621 } 2622 pcbddc->local_primal_size = total_counts; 2623 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2624 ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2625 2626 /* map constraints_idxs in boundary numbering */ 2627 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2628 if (i != constraints_idxs_ptr[total_counts_cc]) { 2629 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2630 } 2631 2632 /* Create constraint matrix */ 2633 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2634 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2635 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2636 2637 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2638 /* determine if a QR strategy is needed for change of basis */ 2639 qr_needed = PETSC_FALSE; 2640 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2641 total_primal_vertices=0; 2642 pcbddc->local_primal_size_cc = 0; 2643 for (i=0;i<total_counts_cc;i++) { 2644 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2645 if (size_of_constraint == 1) { 2646 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2647 pcbddc->local_primal_size_cc += 1; 2648 } else if (PetscBTLookup(change_basis,i)) { 2649 for (k=0;k<constraints_n[i];k++) { 2650 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2651 } 2652 pcbddc->local_primal_size_cc += constraints_n[i]; 2653 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2654 PetscBTSet(qr_needed_idx,i); 2655 qr_needed = PETSC_TRUE; 2656 } 2657 } else { 2658 pcbddc->local_primal_size_cc += 1; 2659 } 2660 } 2661 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2662 pcbddc->n_vertices = total_primal_vertices; 2663 /* permute indices in order to have a sorted set of vertices */ 2664 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2665 2666 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2667 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2668 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2669 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2670 2671 /* nonzero structure of constraint matrix */ 2672 /* and get reference dof for local constraints */ 2673 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2674 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2675 2676 j = total_primal_vertices; 2677 total_counts = total_primal_vertices; 2678 cum = total_primal_vertices; 2679 for (i=n_vertices;i<total_counts_cc;i++) { 2680 if (!PetscBTLookup(change_basis,i)) { 2681 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2682 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2683 cum++; 2684 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2685 for (k=0;k<constraints_n[i];k++) { 2686 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2687 nnz[j+k] = size_of_constraint; 2688 } 2689 j += constraints_n[i]; 2690 } 2691 } 2692 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2693 ierr = PetscFree(nnz);CHKERRQ(ierr); 2694 2695 /* set values in constraint matrix */ 2696 for (i=0;i<total_primal_vertices;i++) { 2697 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2698 } 2699 total_counts = total_primal_vertices; 2700 for (i=n_vertices;i<total_counts_cc;i++) { 2701 if (!PetscBTLookup(change_basis,i)) { 2702 PetscInt *cols; 2703 2704 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2705 cols = constraints_idxs+constraints_idxs_ptr[i]; 2706 for (k=0;k<constraints_n[i];k++) { 2707 PetscInt row = total_counts+k; 2708 PetscScalar *vals; 2709 2710 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2711 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2712 } 2713 total_counts += constraints_n[i]; 2714 } 2715 } 2716 /* assembling */ 2717 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2718 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2719 2720 /* 2721 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2722 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2723 */ 2724 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2725 if (pcbddc->use_change_of_basis) { 2726 /* dual and primal dofs on a single cc */ 2727 PetscInt dual_dofs,primal_dofs; 2728 /* working stuff for GEQRF */ 2729 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2730 PetscBLASInt lqr_work; 2731 /* working stuff for UNGQR */ 2732 PetscScalar *gqr_work,lgqr_work_t; 2733 PetscBLASInt lgqr_work; 2734 /* working stuff for TRTRS */ 2735 PetscScalar *trs_rhs; 2736 PetscBLASInt Blas_NRHS; 2737 /* pointers for values insertion into change of basis matrix */ 2738 PetscInt *start_rows,*start_cols; 2739 PetscScalar *start_vals; 2740 /* working stuff for values insertion */ 2741 PetscBT is_primal; 2742 PetscInt *aux_primal_numbering_B; 2743 /* matrix sizes */ 2744 PetscInt global_size,local_size; 2745 /* temporary change of basis */ 2746 Mat localChangeOfBasisMatrix; 2747 /* extra space for debugging */ 2748 PetscScalar *dbg_work; 2749 2750 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2751 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2752 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2753 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2754 /* nonzeros for local mat */ 2755 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2756 for (i=0;i<pcis->n;i++) nnz[i]=1; 2757 for (i=n_vertices;i<total_counts_cc;i++) { 2758 if (PetscBTLookup(change_basis,i)) { 2759 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2760 if (PetscBTLookup(qr_needed_idx,i)) { 2761 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2762 } else { 2763 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2764 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2765 } 2766 } 2767 } 2768 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2769 ierr = PetscFree(nnz);CHKERRQ(ierr); 2770 /* Set initial identity in the matrix */ 2771 for (i=0;i<pcis->n;i++) { 2772 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2773 } 2774 2775 if (pcbddc->dbg_flag) { 2776 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2777 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2778 } 2779 2780 2781 /* Now we loop on the constraints which need a change of basis */ 2782 /* 2783 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2784 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2785 2786 Basic blocks of change of basis matrix T computed by 2787 2788 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2789 2790 | 1 0 ... 0 s_1/S | 2791 | 0 1 ... 0 s_2/S | 2792 | ... | 2793 | 0 ... 1 s_{n-1}/S | 2794 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2795 2796 with S = \sum_{i=1}^n s_i^2 2797 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2798 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2799 2800 - QR decomposition of constraints otherwise 2801 */ 2802 if (qr_needed) { 2803 /* space to store Q */ 2804 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2805 /* first we issue queries for optimal work */ 2806 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2807 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2808 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2809 lqr_work = -1; 2810 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2811 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2812 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2813 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2814 lgqr_work = -1; 2815 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2816 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2817 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2818 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2819 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2820 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2821 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2822 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2823 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2824 /* array to store scaling factors for reflectors */ 2825 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2826 /* array to store rhs and solution of triangular solver */ 2827 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2828 /* allocating workspace for check */ 2829 if (pcbddc->dbg_flag) { 2830 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2831 } 2832 } 2833 /* array to store whether a node is primal or not */ 2834 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2835 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2836 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2837 if (i != total_primal_vertices) { 2838 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2839 } 2840 for (i=0;i<total_primal_vertices;i++) { 2841 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2842 } 2843 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2844 2845 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2846 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2847 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2848 if (PetscBTLookup(change_basis,total_counts)) { 2849 /* get constraint info */ 2850 primal_dofs = constraints_n[total_counts]; 2851 dual_dofs = size_of_constraint-primal_dofs; 2852 2853 if (pcbddc->dbg_flag) { 2854 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2855 } 2856 2857 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2858 2859 /* copy quadrature constraints for change of basis check */ 2860 if (pcbddc->dbg_flag) { 2861 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2862 } 2863 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2864 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2865 2866 /* compute QR decomposition of constraints */ 2867 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2868 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2869 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2870 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2871 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2872 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2873 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2874 2875 /* explictly compute R^-T */ 2876 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2877 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2878 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2879 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2880 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2881 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2882 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2883 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2884 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2885 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2886 2887 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2888 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2889 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2890 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2891 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2892 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2893 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2894 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2895 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2896 2897 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2898 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2899 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2900 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2901 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2902 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2903 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2904 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2905 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2906 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2907 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2908 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2909 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2910 2911 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2912 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2913 /* insert cols for primal dofs */ 2914 for (j=0;j<primal_dofs;j++) { 2915 start_vals = &qr_basis[j*size_of_constraint]; 2916 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2917 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2918 } 2919 /* insert cols for dual dofs */ 2920 for (j=0,k=0;j<dual_dofs;k++) { 2921 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2922 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2923 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2924 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2925 j++; 2926 } 2927 } 2928 2929 /* check change of basis */ 2930 if (pcbddc->dbg_flag) { 2931 PetscInt ii,jj; 2932 PetscBool valid_qr=PETSC_TRUE; 2933 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2934 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2935 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2936 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2937 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2938 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2939 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2940 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2941 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2942 for (jj=0;jj<size_of_constraint;jj++) { 2943 for (ii=0;ii<primal_dofs;ii++) { 2944 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2945 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2946 } 2947 } 2948 if (!valid_qr) { 2949 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2950 for (jj=0;jj<size_of_constraint;jj++) { 2951 for (ii=0;ii<primal_dofs;ii++) { 2952 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2953 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2954 } 2955 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2956 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2957 } 2958 } 2959 } 2960 } else { 2961 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2962 } 2963 } 2964 } else { /* simple transformation block */ 2965 PetscInt row,col; 2966 PetscScalar val,norm; 2967 2968 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2969 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2970 for (j=0;j<size_of_constraint;j++) { 2971 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2972 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2973 if (!PetscBTLookup(is_primal,row_B)) { 2974 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2975 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2976 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2977 } else { 2978 for (k=0;k<size_of_constraint;k++) { 2979 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2980 if (row != col) { 2981 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2982 } else { 2983 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2984 } 2985 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2986 } 2987 } 2988 } 2989 if (pcbddc->dbg_flag) { 2990 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2991 } 2992 } 2993 } else { 2994 if (pcbddc->dbg_flag) { 2995 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2996 } 2997 } 2998 } 2999 3000 /* free workspace */ 3001 if (qr_needed) { 3002 if (pcbddc->dbg_flag) { 3003 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3004 } 3005 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3006 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3007 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3008 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3009 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3010 } 3011 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3012 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3013 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3014 3015 /* assembling of global change of variable */ 3016 { 3017 Mat tmat; 3018 PetscInt bs; 3019 3020 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3021 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3022 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3023 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3024 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3025 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3026 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3027 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3028 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3029 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3030 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3031 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3032 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3033 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3034 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3035 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3036 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3037 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3038 } 3039 /* check */ 3040 if (pcbddc->dbg_flag) { 3041 PetscReal error; 3042 Vec x,x_change; 3043 3044 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3045 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3046 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3047 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3048 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3049 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3050 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3051 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3052 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3053 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3054 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3055 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3056 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3057 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3058 ierr = VecDestroy(&x);CHKERRQ(ierr); 3059 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3060 } 3061 3062 /* adapt sub_schurs computed (if any) */ 3063 if (pcbddc->use_deluxe_scaling) { 3064 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3065 if (sub_schurs->S_Ej_all) { 3066 Mat S_new,tmat; 3067 IS is_all_N; 3068 3069 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3070 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3071 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3072 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3073 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3074 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3075 sub_schurs->S_Ej_all = S_new; 3076 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3077 if (sub_schurs->sum_S_Ej_all) { 3078 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3079 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3080 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3081 sub_schurs->sum_S_Ej_all = S_new; 3082 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3083 } 3084 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3085 } 3086 } 3087 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3088 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3089 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3090 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3091 } 3092 3093 /* set up change of basis context */ 3094 if (pcbddc->ChangeOfBasisMatrix) { 3095 PCBDDCChange_ctx change_ctx; 3096 3097 if (!pcbddc->new_global_mat) { 3098 PetscInt global_size,local_size; 3099 3100 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3101 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3102 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3103 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3104 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3105 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3106 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3107 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3108 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3109 } else { 3110 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3111 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3112 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3113 } 3114 if (!pcbddc->user_ChangeOfBasisMatrix) { 3115 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3116 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3117 } else { 3118 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3119 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3120 } 3121 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3122 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3123 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3124 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3125 } 3126 3127 /* add pressure dof to set of primal nodes for numbering purposes */ 3128 if (pcbddc->zerodiag) { 3129 const PetscInt *idxs; 3130 PetscInt nz; 3131 3132 ierr = ISGetLocalSize(pcbddc->zerodiag,&nz);CHKERRQ(ierr); 3133 ierr = ISGetIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr); 3134 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = idxs[nz-1]; 3135 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size_cc] = idxs[nz-1]; 3136 ierr = ISRestoreIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr); 3137 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3138 pcbddc->local_primal_size_cc++; 3139 pcbddc->local_primal_size++; 3140 } 3141 3142 /* check if a new primal space has been introduced (also take into account benign trick) */ 3143 pcbddc->new_primal_space_local = PETSC_TRUE; 3144 if (olocal_primal_size == pcbddc->local_primal_size) { 3145 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3146 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3147 if (!pcbddc->new_primal_space_local) { 3148 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3149 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3150 } 3151 } 3152 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3153 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3154 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3155 3156 /* flush dbg viewer */ 3157 if (pcbddc->dbg_flag) { 3158 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3159 } 3160 3161 /* free workspace */ 3162 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3163 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3164 if (!pcbddc->adaptive_selection) { 3165 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3166 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3167 } else { 3168 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3169 pcbddc->adaptive_constraints_idxs_ptr, 3170 pcbddc->adaptive_constraints_data_ptr, 3171 pcbddc->adaptive_constraints_idxs, 3172 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3173 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3174 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3175 } 3176 PetscFunctionReturn(0); 3177 } 3178 3179 #undef __FUNCT__ 3180 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3181 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3182 { 3183 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3184 PC_IS *pcis = (PC_IS*)pc->data; 3185 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3186 PetscInt ierr,i,vertex_size,N; 3187 PetscViewer viewer=pcbddc->dbg_viewer; 3188 3189 PetscFunctionBegin; 3190 /* Reset previously computed graph */ 3191 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3192 /* Init local Graph struct */ 3193 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3194 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3195 3196 /* Check validity of the csr graph passed in by the user */ 3197 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3198 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3199 } 3200 3201 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3202 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3203 PetscInt *xadj,*adjncy; 3204 PetscInt nvtxs; 3205 PetscBool flg_row=PETSC_FALSE; 3206 3207 if (pcbddc->use_local_adj) { 3208 3209 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3210 if (flg_row) { 3211 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3212 pcbddc->computed_rowadj = PETSC_TRUE; 3213 } 3214 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3215 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3216 IS is_dummy; 3217 ISLocalToGlobalMapping l2gmap_dummy; 3218 PetscInt j,sum; 3219 PetscInt *cxadj,*cadjncy; 3220 const PetscInt *idxs; 3221 PCBDDCGraph graph; 3222 PetscBT is_on_boundary; 3223 3224 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3225 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3226 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3227 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3228 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3229 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3230 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3231 if (flg_row) { 3232 graph->xadj = xadj; 3233 graph->adjncy = adjncy; 3234 } 3235 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3236 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3237 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3238 3239 if (pcbddc->dbg_flag) { 3240 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3241 for (i=0;i<graph->ncc;i++) { 3242 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3243 } 3244 } 3245 3246 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3247 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3248 for (i=0;i<pcis->n_B;i++) { 3249 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3250 } 3251 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3252 3253 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3254 sum = 0; 3255 for (i=0;i<graph->ncc;i++) { 3256 PetscInt sizecc = 0; 3257 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3258 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3259 sizecc++; 3260 } 3261 } 3262 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3263 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3264 cxadj[graph->queue[j]] = sizecc; 3265 } 3266 } 3267 sum += sizecc*sizecc; 3268 } 3269 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3270 sum = 0; 3271 for (i=0;i<pcis->n;i++) { 3272 PetscInt temp = cxadj[i]; 3273 cxadj[i] = sum; 3274 sum += temp; 3275 } 3276 cxadj[pcis->n] = sum; 3277 for (i=0;i<graph->ncc;i++) { 3278 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3279 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3280 PetscInt k,sizecc = 0; 3281 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3282 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3283 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3284 sizecc++; 3285 } 3286 } 3287 } 3288 } 3289 } 3290 if (sum) { 3291 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3292 } else { 3293 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3294 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3295 } 3296 graph->xadj = 0; 3297 graph->adjncy = 0; 3298 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3299 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3300 } 3301 } 3302 if (pcbddc->dbg_flag) { 3303 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3304 } 3305 3306 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3307 vertex_size = 1; 3308 if (pcbddc->user_provided_isfordofs) { 3309 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3310 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3311 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3312 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3313 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3314 } 3315 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3316 pcbddc->n_ISForDofs = 0; 3317 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3318 } 3319 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3320 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3321 } else { 3322 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3323 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3324 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3325 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3326 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3327 } 3328 } 3329 } 3330 3331 /* Setup of Graph */ 3332 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3333 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3334 } 3335 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3336 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3337 } 3338 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3339 3340 /* Graph's connected components analysis */ 3341 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3342 3343 /* print some info to stdout */ 3344 if (pcbddc->dbg_flag) { 3345 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3346 } 3347 3348 /* mark topography has done */ 3349 pcbddc->recompute_topography = PETSC_FALSE; 3350 PetscFunctionReturn(0); 3351 } 3352 3353 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3354 #undef __FUNCT__ 3355 #define __FUNCT__ "PCBDDCSubsetNumbering" 3356 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3357 { 3358 PetscSF sf; 3359 PetscLayout map; 3360 const PetscInt *idxs; 3361 PetscInt *leaf_data,*root_data,*gidxs; 3362 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3363 PetscInt n_n,nlocals,start,first_index; 3364 PetscMPIInt commsize; 3365 PetscBool first_found; 3366 PetscErrorCode ierr; 3367 3368 PetscFunctionBegin; 3369 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3370 if (subset_mult) { 3371 PetscCheckSameComm(subset,1,subset_mult,2); 3372 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3373 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3374 } 3375 /* create workspace layout for computing global indices of subset */ 3376 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3377 lbounds[0] = lbounds[1] = 0; 3378 for (i=0;i<n;i++) { 3379 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3380 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3381 } 3382 lbounds[0] = -lbounds[0]; 3383 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3384 gbounds[0] = -gbounds[0]; 3385 N = gbounds[1] - gbounds[0] + 1; 3386 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3387 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3388 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3389 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3390 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3391 3392 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3393 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3394 if (subset_mult) { 3395 const PetscInt* idxs_mult; 3396 3397 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3398 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3399 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3400 } else { 3401 for (i=0;i<n;i++) leaf_data[i] = 1; 3402 } 3403 /* local size of new subset */ 3404 n_n = 0; 3405 for (i=0;i<n;i++) n_n += leaf_data[i]; 3406 3407 /* global indexes in layout */ 3408 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3409 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3410 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3411 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3412 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3413 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3414 3415 /* reduce from leaves to roots */ 3416 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3417 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3418 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3419 3420 /* count indexes in local part of layout */ 3421 nlocals = 0; 3422 first_index = -1; 3423 first_found = PETSC_FALSE; 3424 for (i=0;i<Nl;i++) { 3425 if (!first_found && root_data[i]) { 3426 first_found = PETSC_TRUE; 3427 first_index = i; 3428 } 3429 nlocals += root_data[i]; 3430 } 3431 3432 /* cumulative of number of indexes and size of subset without holes */ 3433 #if defined(PETSC_HAVE_MPI_EXSCAN) 3434 start = 0; 3435 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3436 #else 3437 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3438 start = start-nlocals; 3439 #endif 3440 3441 if (N_n) { /* compute total size of new subset if requested */ 3442 *N_n = start + nlocals; 3443 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3444 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3445 } 3446 3447 /* adapt root data with cumulative */ 3448 if (first_found) { 3449 PetscInt old_index; 3450 3451 root_data[first_index] += start; 3452 old_index = first_index; 3453 for (i=first_index+1;i<Nl;i++) { 3454 if (root_data[i]) { 3455 root_data[i] += root_data[old_index]; 3456 old_index = i; 3457 } 3458 } 3459 } 3460 3461 /* from roots to leaves */ 3462 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3463 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3464 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3465 3466 /* create new IS with global indexes without holes */ 3467 if (subset_mult) { 3468 const PetscInt* idxs_mult; 3469 PetscInt cum; 3470 3471 cum = 0; 3472 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3473 for (i=0;i<n;i++) { 3474 PetscInt j; 3475 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3476 } 3477 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3478 } else { 3479 for (i=0;i<n;i++) { 3480 gidxs[i] = leaf_data[i]-1; 3481 } 3482 } 3483 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3484 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3485 PetscFunctionReturn(0); 3486 } 3487 3488 #undef __FUNCT__ 3489 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3490 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3491 { 3492 PetscInt i,j; 3493 PetscScalar *alphas; 3494 PetscErrorCode ierr; 3495 3496 PetscFunctionBegin; 3497 /* this implements stabilized Gram-Schmidt */ 3498 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3499 for (i=0;i<n;i++) { 3500 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3501 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3502 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3503 } 3504 ierr = PetscFree(alphas);CHKERRQ(ierr); 3505 PetscFunctionReturn(0); 3506 } 3507 3508 #undef __FUNCT__ 3509 #define __FUNCT__ "MatISGetSubassemblingPattern" 3510 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3511 { 3512 IS ranks_send_to; 3513 PetscInt n_neighs,*neighs,*n_shared,**shared; 3514 PetscMPIInt size,rank,color; 3515 PetscInt *xadj,*adjncy; 3516 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3517 PetscInt i,local_size,threshold=0; 3518 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3519 PetscSubcomm subcomm; 3520 PetscErrorCode ierr; 3521 3522 PetscFunctionBegin; 3523 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3524 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3525 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3526 3527 /* Get info on mapping */ 3528 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3529 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3530 3531 /* build local CSR graph of subdomains' connectivity */ 3532 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3533 xadj[0] = 0; 3534 xadj[1] = PetscMax(n_neighs-1,0); 3535 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3536 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3537 3538 if (threshold) { 3539 PetscInt xadj_count = 0; 3540 for (i=1;i<n_neighs;i++) { 3541 if (n_shared[i] > threshold) { 3542 adjncy[xadj_count] = neighs[i]; 3543 adjncy_wgt[xadj_count] = n_shared[i]; 3544 xadj_count++; 3545 } 3546 } 3547 xadj[1] = xadj_count; 3548 } else { 3549 if (xadj[1]) { 3550 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3551 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3552 } 3553 } 3554 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3555 if (use_square) { 3556 for (i=0;i<xadj[1];i++) { 3557 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3558 } 3559 } 3560 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3561 3562 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3563 3564 /* 3565 Restrict work on active processes only. 3566 */ 3567 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3568 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3569 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3570 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3571 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3572 if (color) { 3573 ierr = PetscFree(xadj);CHKERRQ(ierr); 3574 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3575 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3576 } else { 3577 Mat subdomain_adj; 3578 IS new_ranks,new_ranks_contig; 3579 MatPartitioning partitioner; 3580 PetscInt prank,rstart=0,rend=0; 3581 PetscInt *is_indices,*oldranks; 3582 PetscBool aggregate; 3583 3584 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3585 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3586 prank = rank; 3587 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3588 /* 3589 for (i=0;i<size;i++) { 3590 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3591 } 3592 */ 3593 for (i=0;i<xadj[1];i++) { 3594 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3595 } 3596 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3597 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3598 if (aggregate) { 3599 PetscInt lrows,row,ncols,*cols; 3600 PetscMPIInt nrank; 3601 PetscScalar *vals; 3602 3603 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3604 lrows = 0; 3605 if (nrank<redprocs) { 3606 lrows = size/redprocs; 3607 if (nrank<size%redprocs) lrows++; 3608 } 3609 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3610 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3611 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3612 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3613 row = nrank; 3614 ncols = xadj[1]-xadj[0]; 3615 cols = adjncy; 3616 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3617 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3618 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3619 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3620 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3621 ierr = PetscFree(xadj);CHKERRQ(ierr); 3622 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3623 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3624 ierr = PetscFree(vals);CHKERRQ(ierr); 3625 } else { 3626 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3627 } 3628 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3629 3630 /* Partition */ 3631 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3632 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3633 if (use_vwgt) { 3634 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3635 v_wgt[0] = local_size; 3636 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3637 } 3638 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3639 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3640 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3641 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3642 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3643 3644 /* renumber new_ranks to avoid "holes" in new set of processors */ 3645 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3646 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3647 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3648 if (!redprocs) { 3649 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3650 } else { 3651 PetscInt idxs[1]; 3652 PetscMPIInt tag; 3653 MPI_Request *reqs; 3654 3655 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3656 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3657 for (i=rstart;i<rend;i++) { 3658 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3659 } 3660 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3661 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3662 ierr = PetscFree(reqs);CHKERRQ(ierr); 3663 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3664 } 3665 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3666 /* clean up */ 3667 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3668 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3669 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3670 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3671 } 3672 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3673 3674 /* assemble parallel IS for sends */ 3675 i = 1; 3676 if (color) i=0; 3677 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3678 /* get back IS */ 3679 *is_sends = ranks_send_to; 3680 PetscFunctionReturn(0); 3681 } 3682 3683 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3684 3685 #undef __FUNCT__ 3686 #define __FUNCT__ "MatISSubassemble" 3687 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3688 { 3689 Mat local_mat; 3690 IS is_sends_internal; 3691 PetscInt rows,cols,new_local_rows; 3692 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3693 PetscBool ismatis,isdense,newisdense,destroy_mat; 3694 ISLocalToGlobalMapping l2gmap; 3695 PetscInt* l2gmap_indices; 3696 const PetscInt* is_indices; 3697 MatType new_local_type; 3698 /* buffers */ 3699 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3700 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3701 PetscInt *recv_buffer_idxs_local; 3702 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3703 /* MPI */ 3704 MPI_Comm comm,comm_n; 3705 PetscSubcomm subcomm; 3706 PetscMPIInt n_sends,n_recvs,commsize; 3707 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3708 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3709 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3710 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3711 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3712 PetscErrorCode ierr; 3713 3714 PetscFunctionBegin; 3715 /* TODO: add missing checks */ 3716 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3717 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3718 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3719 PetscValidLogicalCollectiveInt(mat,nis,7); 3720 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3721 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3722 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3723 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3724 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3725 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3726 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3727 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3728 PetscInt mrows,mcols,mnrows,mncols; 3729 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3730 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3731 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3732 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3733 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3734 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3735 } 3736 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3737 PetscValidLogicalCollectiveInt(mat,bs,0); 3738 /* prepare IS for sending if not provided */ 3739 if (!is_sends) { 3740 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3741 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3742 } else { 3743 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3744 is_sends_internal = is_sends; 3745 } 3746 3747 /* get comm */ 3748 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3749 3750 /* compute number of sends */ 3751 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3752 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3753 3754 /* compute number of receives */ 3755 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3756 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3757 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3758 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3759 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3760 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3761 ierr = PetscFree(iflags);CHKERRQ(ierr); 3762 3763 /* restrict comm if requested */ 3764 subcomm = 0; 3765 destroy_mat = PETSC_FALSE; 3766 if (restrict_comm) { 3767 PetscMPIInt color,subcommsize; 3768 3769 color = 0; 3770 if (restrict_full) { 3771 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3772 } else { 3773 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3774 } 3775 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3776 subcommsize = commsize - subcommsize; 3777 /* check if reuse has been requested */ 3778 if (reuse == MAT_REUSE_MATRIX) { 3779 if (*mat_n) { 3780 PetscMPIInt subcommsize2; 3781 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3782 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3783 comm_n = PetscObjectComm((PetscObject)*mat_n); 3784 } else { 3785 comm_n = PETSC_COMM_SELF; 3786 } 3787 } else { /* MAT_INITIAL_MATRIX */ 3788 PetscMPIInt rank; 3789 3790 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3791 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3792 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3793 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3794 comm_n = PetscSubcommChild(subcomm); 3795 } 3796 /* flag to destroy *mat_n if not significative */ 3797 if (color) destroy_mat = PETSC_TRUE; 3798 } else { 3799 comm_n = comm; 3800 } 3801 3802 /* prepare send/receive buffers */ 3803 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3804 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3805 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3806 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3807 if (nis) { 3808 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3809 } 3810 3811 /* Get data from local matrices */ 3812 if (!isdense) { 3813 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3814 /* TODO: See below some guidelines on how to prepare the local buffers */ 3815 /* 3816 send_buffer_vals should contain the raw values of the local matrix 3817 send_buffer_idxs should contain: 3818 - MatType_PRIVATE type 3819 - PetscInt size_of_l2gmap 3820 - PetscInt global_row_indices[size_of_l2gmap] 3821 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3822 */ 3823 } else { 3824 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3825 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3826 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3827 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3828 send_buffer_idxs[1] = i; 3829 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3830 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3831 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3832 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3833 for (i=0;i<n_sends;i++) { 3834 ilengths_vals[is_indices[i]] = len*len; 3835 ilengths_idxs[is_indices[i]] = len+2; 3836 } 3837 } 3838 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3839 /* additional is (if any) */ 3840 if (nis) { 3841 PetscMPIInt psum; 3842 PetscInt j; 3843 for (j=0,psum=0;j<nis;j++) { 3844 PetscInt plen; 3845 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3846 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3847 psum += len+1; /* indices + lenght */ 3848 } 3849 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3850 for (j=0,psum=0;j<nis;j++) { 3851 PetscInt plen; 3852 const PetscInt *is_array_idxs; 3853 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3854 send_buffer_idxs_is[psum] = plen; 3855 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3856 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3857 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3858 psum += plen+1; /* indices + lenght */ 3859 } 3860 for (i=0;i<n_sends;i++) { 3861 ilengths_idxs_is[is_indices[i]] = psum; 3862 } 3863 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3864 } 3865 3866 buf_size_idxs = 0; 3867 buf_size_vals = 0; 3868 buf_size_idxs_is = 0; 3869 for (i=0;i<n_recvs;i++) { 3870 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3871 buf_size_vals += (PetscInt)olengths_vals[i]; 3872 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3873 } 3874 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3875 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3876 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3877 3878 /* get new tags for clean communications */ 3879 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3880 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3881 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3882 3883 /* allocate for requests */ 3884 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3885 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3886 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3887 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3888 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3889 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3890 3891 /* communications */ 3892 ptr_idxs = recv_buffer_idxs; 3893 ptr_vals = recv_buffer_vals; 3894 ptr_idxs_is = recv_buffer_idxs_is; 3895 for (i=0;i<n_recvs;i++) { 3896 source_dest = onodes[i]; 3897 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3898 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3899 ptr_idxs += olengths_idxs[i]; 3900 ptr_vals += olengths_vals[i]; 3901 if (nis) { 3902 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3903 ptr_idxs_is += olengths_idxs_is[i]; 3904 } 3905 } 3906 for (i=0;i<n_sends;i++) { 3907 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3908 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3909 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3910 if (nis) { 3911 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3912 } 3913 } 3914 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3915 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3916 3917 /* assemble new l2g map */ 3918 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3919 ptr_idxs = recv_buffer_idxs; 3920 new_local_rows = 0; 3921 for (i=0;i<n_recvs;i++) { 3922 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3923 ptr_idxs += olengths_idxs[i]; 3924 } 3925 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3926 ptr_idxs = recv_buffer_idxs; 3927 new_local_rows = 0; 3928 for (i=0;i<n_recvs;i++) { 3929 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3930 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3931 ptr_idxs += olengths_idxs[i]; 3932 } 3933 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3934 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3935 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3936 3937 /* infer new local matrix type from received local matrices type */ 3938 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3939 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3940 if (n_recvs) { 3941 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3942 ptr_idxs = recv_buffer_idxs; 3943 for (i=0;i<n_recvs;i++) { 3944 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3945 new_local_type_private = MATAIJ_PRIVATE; 3946 break; 3947 } 3948 ptr_idxs += olengths_idxs[i]; 3949 } 3950 switch (new_local_type_private) { 3951 case MATDENSE_PRIVATE: 3952 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3953 new_local_type = MATSEQAIJ; 3954 bs = 1; 3955 } else { /* if I receive only 1 dense matrix */ 3956 new_local_type = MATSEQDENSE; 3957 bs = 1; 3958 } 3959 break; 3960 case MATAIJ_PRIVATE: 3961 new_local_type = MATSEQAIJ; 3962 bs = 1; 3963 break; 3964 case MATBAIJ_PRIVATE: 3965 new_local_type = MATSEQBAIJ; 3966 break; 3967 case MATSBAIJ_PRIVATE: 3968 new_local_type = MATSEQSBAIJ; 3969 break; 3970 default: 3971 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3972 break; 3973 } 3974 } else { /* by default, new_local_type is seqdense */ 3975 new_local_type = MATSEQDENSE; 3976 bs = 1; 3977 } 3978 3979 /* create MATIS object if needed */ 3980 if (reuse == MAT_INITIAL_MATRIX) { 3981 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3982 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 3983 } else { 3984 /* it also destroys the local matrices */ 3985 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3986 } 3987 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3988 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3989 3990 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3991 3992 /* Global to local map of received indices */ 3993 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3994 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3995 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3996 3997 /* restore attributes -> type of incoming data and its size */ 3998 buf_size_idxs = 0; 3999 for (i=0;i<n_recvs;i++) { 4000 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4001 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4002 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4003 } 4004 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4005 4006 /* set preallocation */ 4007 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4008 if (!newisdense) { 4009 PetscInt *new_local_nnz=0; 4010 4011 ptr_vals = recv_buffer_vals; 4012 ptr_idxs = recv_buffer_idxs_local; 4013 if (n_recvs) { 4014 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4015 } 4016 for (i=0;i<n_recvs;i++) { 4017 PetscInt j; 4018 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4019 for (j=0;j<*(ptr_idxs+1);j++) { 4020 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4021 } 4022 } else { 4023 /* TODO */ 4024 } 4025 ptr_idxs += olengths_idxs[i]; 4026 } 4027 if (new_local_nnz) { 4028 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4029 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4030 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4031 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4032 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4033 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4034 } else { 4035 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4036 } 4037 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4038 } else { 4039 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4040 } 4041 4042 /* set values */ 4043 ptr_vals = recv_buffer_vals; 4044 ptr_idxs = recv_buffer_idxs_local; 4045 for (i=0;i<n_recvs;i++) { 4046 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4047 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4048 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4049 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4050 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4051 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4052 } else { 4053 /* TODO */ 4054 } 4055 ptr_idxs += olengths_idxs[i]; 4056 ptr_vals += olengths_vals[i]; 4057 } 4058 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4059 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4060 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4061 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4062 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4063 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4064 4065 #if 0 4066 if (!restrict_comm) { /* check */ 4067 Vec lvec,rvec; 4068 PetscReal infty_error; 4069 4070 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4071 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4072 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4073 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4074 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4075 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4076 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4077 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4078 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4079 } 4080 #endif 4081 4082 /* assemble new additional is (if any) */ 4083 if (nis) { 4084 PetscInt **temp_idxs,*count_is,j,psum; 4085 4086 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4087 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4088 ptr_idxs = recv_buffer_idxs_is; 4089 psum = 0; 4090 for (i=0;i<n_recvs;i++) { 4091 for (j=0;j<nis;j++) { 4092 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4093 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4094 psum += plen; 4095 ptr_idxs += plen+1; /* shift pointer to received data */ 4096 } 4097 } 4098 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4099 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4100 for (i=1;i<nis;i++) { 4101 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4102 } 4103 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4104 ptr_idxs = recv_buffer_idxs_is; 4105 for (i=0;i<n_recvs;i++) { 4106 for (j=0;j<nis;j++) { 4107 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4108 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4109 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4110 ptr_idxs += plen+1; /* shift pointer to received data */ 4111 } 4112 } 4113 for (i=0;i<nis;i++) { 4114 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4115 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4116 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4117 } 4118 ierr = PetscFree(count_is);CHKERRQ(ierr); 4119 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4120 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4121 } 4122 /* free workspace */ 4123 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4124 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4125 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4126 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4127 if (isdense) { 4128 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4129 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4130 } else { 4131 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4132 } 4133 if (nis) { 4134 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4135 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4136 } 4137 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4138 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4139 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4140 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4141 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4142 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4143 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4144 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4145 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4146 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4147 ierr = PetscFree(onodes);CHKERRQ(ierr); 4148 if (nis) { 4149 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4150 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4151 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4152 } 4153 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4154 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4155 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4156 for (i=0;i<nis;i++) { 4157 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4158 } 4159 *mat_n = NULL; 4160 } 4161 PetscFunctionReturn(0); 4162 } 4163 4164 /* temporary hack into ksp private data structure */ 4165 #include <petsc/private/kspimpl.h> 4166 4167 #undef __FUNCT__ 4168 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4169 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4170 { 4171 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4172 PC_IS *pcis = (PC_IS*)pc->data; 4173 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4174 MatNullSpace CoarseNullSpace=NULL; 4175 ISLocalToGlobalMapping coarse_islg; 4176 IS coarse_is,*isarray; 4177 PetscInt i,im_active=-1,active_procs=-1; 4178 PetscInt nis,nisdofs,nisneu; 4179 PC pc_temp; 4180 PCType coarse_pc_type; 4181 KSPType coarse_ksp_type; 4182 PetscBool multilevel_requested,multilevel_allowed; 4183 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4184 Mat t_coarse_mat_is; 4185 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4186 PetscMPIInt all_procs; 4187 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4188 PetscBool compute_vecs = PETSC_FALSE; 4189 PetscScalar *array; 4190 PetscErrorCode ierr; 4191 4192 PetscFunctionBegin; 4193 /* Assign global numbering to coarse dofs */ 4194 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4195 PetscInt ocoarse_size; 4196 compute_vecs = PETSC_TRUE; 4197 ocoarse_size = pcbddc->coarse_size; 4198 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4199 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4200 /* see if we can avoid some work */ 4201 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4202 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4203 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4204 PC pc; 4205 PetscBool isbddc; 4206 4207 /* temporary workaround since PCBDDC does not have a reset method so far */ 4208 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4209 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4210 if (isbddc) { 4211 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4212 } 4213 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4214 coarse_reuse = PETSC_FALSE; 4215 } else { /* we can safely reuse already computed coarse matrix */ 4216 coarse_reuse = PETSC_TRUE; 4217 } 4218 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4219 coarse_reuse = PETSC_FALSE; 4220 } 4221 /* reset any subassembling information */ 4222 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4223 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4224 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4225 coarse_reuse = PETSC_TRUE; 4226 } 4227 4228 /* count "active" (i.e. with positive local size) and "void" processes */ 4229 im_active = !!(pcis->n); 4230 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4231 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4232 void_procs = all_procs-active_procs; 4233 csin_type_simple = PETSC_TRUE; 4234 redist = PETSC_FALSE; 4235 if (pcbddc->current_level && void_procs) { 4236 csin_ml = PETSC_TRUE; 4237 ncoarse_ml = void_procs; 4238 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4239 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4240 csin_ds = PETSC_TRUE; 4241 ncoarse_ds = pcbddc->redistribute_coarse; 4242 redist = PETSC_TRUE; 4243 } else { 4244 csin_ds = PETSC_TRUE; 4245 ncoarse_ds = active_procs; 4246 redist = PETSC_TRUE; 4247 } 4248 } else { 4249 csin_ml = PETSC_FALSE; 4250 ncoarse_ml = all_procs; 4251 if (void_procs) { 4252 csin_ds = PETSC_TRUE; 4253 ncoarse_ds = void_procs; 4254 csin_type_simple = PETSC_FALSE; 4255 } else { 4256 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4257 csin_ds = PETSC_TRUE; 4258 ncoarse_ds = pcbddc->redistribute_coarse; 4259 redist = PETSC_TRUE; 4260 } else { 4261 csin_ds = PETSC_FALSE; 4262 ncoarse_ds = all_procs; 4263 } 4264 } 4265 } 4266 4267 /* 4268 test if we can go multilevel: three conditions must be satisfied: 4269 - we have not exceeded the number of levels requested 4270 - we can actually subassemble the active processes 4271 - we can find a suitable number of MPI processes where we can place the subassembled problem 4272 */ 4273 multilevel_allowed = PETSC_FALSE; 4274 multilevel_requested = PETSC_FALSE; 4275 if (pcbddc->current_level < pcbddc->max_levels) { 4276 multilevel_requested = PETSC_TRUE; 4277 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4278 multilevel_allowed = PETSC_FALSE; 4279 } else { 4280 multilevel_allowed = PETSC_TRUE; 4281 } 4282 } 4283 /* determine number of process partecipating to coarse solver */ 4284 if (multilevel_allowed) { 4285 ncoarse = ncoarse_ml; 4286 csin = csin_ml; 4287 redist = PETSC_FALSE; 4288 } else { 4289 ncoarse = ncoarse_ds; 4290 csin = csin_ds; 4291 } 4292 4293 /* creates temporary l2gmap and IS for coarse indexes */ 4294 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4295 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4296 4297 /* creates temporary MATIS object for coarse matrix */ 4298 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4299 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4300 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4301 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4302 #if 0 4303 { 4304 PetscViewer viewer; 4305 char filename[256]; 4306 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4307 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4308 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4309 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4310 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4311 } 4312 #endif 4313 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4314 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4315 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4316 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4317 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4318 4319 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4320 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4321 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4322 const PetscInt *idxs; 4323 ISLocalToGlobalMapping tmap; 4324 4325 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4326 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4327 /* allocate space for temporary storage */ 4328 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4329 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4330 /* allocate for IS array */ 4331 nisdofs = pcbddc->n_ISForDofsLocal; 4332 nisneu = !!pcbddc->NeumannBoundariesLocal; 4333 nis = nisdofs + nisneu; 4334 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4335 /* dofs splitting */ 4336 for (i=0;i<nisdofs;i++) { 4337 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4338 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4339 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4340 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4341 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4342 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4343 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4344 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4345 } 4346 /* neumann boundaries */ 4347 if (pcbddc->NeumannBoundariesLocal) { 4348 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4349 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4350 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4351 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4352 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4353 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4354 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4355 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4356 } 4357 /* free memory */ 4358 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4359 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4360 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4361 } else { 4362 nis = 0; 4363 nisdofs = 0; 4364 nisneu = 0; 4365 isarray = NULL; 4366 } 4367 /* destroy no longer needed map */ 4368 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4369 4370 /* restrict on coarse candidates (if needed) */ 4371 coarse_mat_is = NULL; 4372 if (csin) { 4373 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4374 if (redist) { 4375 PetscMPIInt rank; 4376 PetscInt spc,n_spc_p1,dest[1],destsize; 4377 4378 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4379 spc = active_procs/ncoarse; 4380 n_spc_p1 = active_procs%ncoarse; 4381 if (im_active) { 4382 destsize = 1; 4383 if (rank > n_spc_p1*(spc+1)-1) { 4384 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4385 } else { 4386 dest[0] = rank/(spc+1); 4387 } 4388 } else { 4389 destsize = 0; 4390 } 4391 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4392 } else if (csin_type_simple) { 4393 PetscMPIInt rank; 4394 PetscInt issize,isidx; 4395 4396 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4397 if (im_active) { 4398 issize = 1; 4399 isidx = (PetscInt)rank; 4400 } else { 4401 issize = 0; 4402 isidx = -1; 4403 } 4404 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4405 } else { /* get a suitable subassembling pattern from MATIS code */ 4406 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4407 } 4408 4409 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4410 if (!redist || ncoarse <= void_procs) { 4411 PetscInt ncoarse_cand,tissize,*nisindices; 4412 PetscInt *coarse_candidates; 4413 const PetscInt* tisindices; 4414 4415 /* get coarse candidates' ranks in pc communicator */ 4416 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4417 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4418 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4419 if (!coarse_candidates[i]) { 4420 coarse_candidates[ncoarse_cand++]=i; 4421 } 4422 } 4423 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4424 4425 4426 if (pcbddc->dbg_flag) { 4427 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4428 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4429 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4430 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4431 for (i=0;i<ncoarse_cand;i++) { 4432 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4433 } 4434 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4435 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4436 } 4437 /* shift the pattern on coarse candidates */ 4438 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4439 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4440 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4441 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4442 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4443 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4444 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4445 } 4446 if (pcbddc->dbg_flag) { 4447 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4448 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4449 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4450 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4451 } 4452 } 4453 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4454 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4455 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4456 } else { /* this is the last level, so use just receiving processes in subcomm */ 4457 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4458 } 4459 } else { 4460 if (pcbddc->dbg_flag) { 4461 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4462 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4463 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4464 } 4465 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4466 coarse_mat_is = t_coarse_mat_is; 4467 } 4468 4469 /* create local to global scatters for coarse problem */ 4470 if (compute_vecs) { 4471 PetscInt lrows; 4472 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4473 if (coarse_mat_is) { 4474 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4475 } else { 4476 lrows = 0; 4477 } 4478 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4479 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4480 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4481 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4482 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4483 } 4484 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4485 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4486 4487 /* set defaults for coarse KSP and PC */ 4488 if (multilevel_allowed) { 4489 coarse_ksp_type = KSPRICHARDSON; 4490 coarse_pc_type = PCBDDC; 4491 } else { 4492 coarse_ksp_type = KSPPREONLY; 4493 coarse_pc_type = PCREDUNDANT; 4494 } 4495 4496 /* print some info if requested */ 4497 if (pcbddc->dbg_flag) { 4498 if (!multilevel_allowed) { 4499 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4500 if (multilevel_requested) { 4501 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4502 } else if (pcbddc->max_levels) { 4503 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4504 } 4505 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4506 } 4507 } 4508 4509 /* create the coarse KSP object only once with defaults */ 4510 if (coarse_mat_is) { 4511 MatReuse coarse_mat_reuse; 4512 PetscViewer dbg_viewer = NULL; 4513 if (pcbddc->dbg_flag) { 4514 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4515 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4516 } 4517 if (!pcbddc->coarse_ksp) { 4518 char prefix[256],str_level[16]; 4519 size_t len; 4520 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4521 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4522 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4523 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4524 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4525 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4526 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4527 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4528 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4529 /* prefix */ 4530 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4531 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4532 if (!pcbddc->current_level) { 4533 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4534 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4535 } else { 4536 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4537 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4538 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4539 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4540 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4541 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4542 } 4543 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4544 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4545 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4546 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4547 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4548 /* allow user customization */ 4549 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4550 } 4551 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4552 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4553 if (nisdofs) { 4554 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4555 for (i=0;i<nisdofs;i++) { 4556 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4557 } 4558 } 4559 if (nisneu) { 4560 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4561 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4562 } 4563 4564 /* get some info after set from options */ 4565 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4566 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4567 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4568 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4569 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4570 isbddc = PETSC_FALSE; 4571 } 4572 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4573 if (isredundant) { 4574 KSP inner_ksp; 4575 PC inner_pc; 4576 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4577 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4578 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4579 } 4580 4581 /* assemble coarse matrix */ 4582 if (coarse_reuse) { 4583 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4584 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4585 coarse_mat_reuse = MAT_REUSE_MATRIX; 4586 } else { 4587 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4588 } 4589 if (isbddc || isnn) { 4590 if (pcbddc->coarsening_ratio > 1) { 4591 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4592 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4593 if (pcbddc->dbg_flag) { 4594 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4595 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4596 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4597 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4598 } 4599 } 4600 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4601 } else { 4602 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4603 coarse_mat = coarse_mat_is; 4604 } 4605 } else { 4606 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4607 } 4608 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4609 4610 /* propagate symmetry info of coarse matrix */ 4611 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4612 if (pc->pmat->symmetric_set) { 4613 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4614 } 4615 if (pc->pmat->hermitian_set) { 4616 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4617 } 4618 if (pc->pmat->spd_set) { 4619 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4620 } 4621 /* set operators */ 4622 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4623 if (pcbddc->dbg_flag) { 4624 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4625 } 4626 } else { /* processes non partecipating to coarse solver (if any) */ 4627 coarse_mat = 0; 4628 } 4629 ierr = PetscFree(isarray);CHKERRQ(ierr); 4630 #if 0 4631 { 4632 PetscViewer viewer; 4633 char filename[256]; 4634 sprintf(filename,"coarse_mat.m"); 4635 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4636 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4637 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4638 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4639 } 4640 #endif 4641 4642 /* Compute coarse null space (special handling by BDDC only) */ 4643 #if 0 4644 if (pcbddc->NullSpace) { 4645 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4646 } 4647 #endif 4648 4649 if (pcbddc->coarse_ksp) { 4650 Vec crhs,csol; 4651 PetscBool ispreonly; 4652 4653 if (CoarseNullSpace) { 4654 if (isbddc) { 4655 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4656 } else { 4657 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4658 } 4659 } 4660 /* setup coarse ksp */ 4661 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4662 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4663 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4664 /* hack */ 4665 if (!csol) { 4666 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4667 } 4668 if (!crhs) { 4669 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4670 } 4671 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4672 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4673 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4674 KSP check_ksp; 4675 KSPType check_ksp_type; 4676 PC check_pc; 4677 Vec check_vec,coarse_vec; 4678 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4679 PetscInt its; 4680 PetscBool compute_eigs; 4681 PetscReal *eigs_r,*eigs_c; 4682 PetscInt neigs; 4683 const char *prefix; 4684 4685 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4686 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4687 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4688 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4689 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4690 if (ispreonly) { 4691 check_ksp_type = KSPPREONLY; 4692 compute_eigs = PETSC_FALSE; 4693 } else { 4694 check_ksp_type = KSPGMRES; 4695 compute_eigs = PETSC_TRUE; 4696 } 4697 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4698 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4699 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4700 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4701 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4702 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4703 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4704 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4705 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4706 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4707 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4708 /* create random vec */ 4709 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4710 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4711 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4712 if (CoarseNullSpace) { 4713 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4714 } 4715 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4716 /* solve coarse problem */ 4717 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4718 if (CoarseNullSpace) { 4719 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4720 } 4721 /* set eigenvalue estimation if preonly has not been requested */ 4722 if (compute_eigs) { 4723 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4724 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4725 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4726 lambda_max = eigs_r[neigs-1]; 4727 lambda_min = eigs_r[0]; 4728 if (pcbddc->use_coarse_estimates) { 4729 if (lambda_max>lambda_min) { 4730 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4731 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4732 } 4733 } 4734 } 4735 4736 /* check coarse problem residual error */ 4737 if (pcbddc->dbg_flag) { 4738 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4739 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4740 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4741 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4742 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4743 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4744 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4745 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4746 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4747 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4748 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4749 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4750 if (compute_eigs) { 4751 PetscReal lambda_max_s,lambda_min_s; 4752 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4753 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4754 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4755 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4756 for (i=0;i<neigs;i++) { 4757 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4758 } 4759 } 4760 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4761 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4762 } 4763 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4764 if (compute_eigs) { 4765 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4766 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4767 } 4768 } 4769 } 4770 /* print additional info */ 4771 if (pcbddc->dbg_flag) { 4772 /* waits until all processes reaches this point */ 4773 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4774 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4775 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4776 } 4777 4778 /* free memory */ 4779 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4780 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4781 PetscFunctionReturn(0); 4782 } 4783 4784 #undef __FUNCT__ 4785 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4786 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4787 { 4788 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4789 PC_IS* pcis = (PC_IS*)pc->data; 4790 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4791 IS subset,subset_mult,subset_n; 4792 PetscInt local_size,coarse_size=0; 4793 PetscInt *local_primal_indices=NULL; 4794 const PetscInt *t_local_primal_indices; 4795 PetscErrorCode ierr; 4796 4797 PetscFunctionBegin; 4798 /* Compute global number of coarse dofs */ 4799 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4800 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4801 } 4802 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4803 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4804 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4805 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4806 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4807 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4808 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4809 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4810 if (local_size != pcbddc->local_primal_size) { 4811 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4812 } 4813 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4814 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4815 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4816 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4817 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4818 4819 /* check numbering */ 4820 if (pcbddc->dbg_flag) { 4821 PetscScalar coarsesum,*array,*array2; 4822 PetscInt i; 4823 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4824 4825 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4826 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4827 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4828 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4829 /* counter */ 4830 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4831 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4832 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4833 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4834 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4835 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4836 4837 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4838 for (i=0;i<pcbddc->local_primal_size;i++) { 4839 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4840 } 4841 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4842 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4843 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4844 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4845 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4846 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4847 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4848 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4849 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4850 for (i=0;i<pcis->n;i++) { 4851 if (array[i] != 0.0 && array[i] != array2[i]) { 4852 PetscInt owned = (PetscInt)(array[i]); 4853 PetscInt neigh = (PetscInt)(array2[i]); 4854 set_error = PETSC_TRUE; 4855 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 4856 } 4857 } 4858 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4859 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4860 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4861 for (i=0;i<pcis->n;i++) { 4862 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4863 } 4864 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4865 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4866 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4867 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4868 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4869 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4870 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4871 PetscInt *gidxs; 4872 4873 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4874 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4875 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4876 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4878 for (i=0;i<pcbddc->local_primal_size;i++) { 4879 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4880 } 4881 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4882 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4883 } 4884 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4885 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4886 } 4887 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4888 /* get back data */ 4889 *coarse_size_n = coarse_size; 4890 *local_primal_indices_n = local_primal_indices; 4891 PetscFunctionReturn(0); 4892 } 4893 4894 #undef __FUNCT__ 4895 #define __FUNCT__ "PCBDDCGlobalToLocal" 4896 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4897 { 4898 IS localis_t; 4899 PetscInt i,lsize,*idxs,n; 4900 PetscScalar *vals; 4901 PetscErrorCode ierr; 4902 4903 PetscFunctionBegin; 4904 /* get indices in local ordering exploiting local to global map */ 4905 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4906 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4907 for (i=0;i<lsize;i++) vals[i] = 1.0; 4908 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4909 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4910 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4911 if (idxs) { /* multilevel guard */ 4912 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4913 } 4914 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4915 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4916 ierr = PetscFree(vals);CHKERRQ(ierr); 4917 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4918 /* now compute set in local ordering */ 4919 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4920 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4921 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4922 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4923 for (i=0,lsize=0;i<n;i++) { 4924 if (PetscRealPart(vals[i]) > 0.5) { 4925 lsize++; 4926 } 4927 } 4928 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4929 for (i=0,lsize=0;i<n;i++) { 4930 if (PetscRealPart(vals[i]) > 0.5) { 4931 idxs[lsize++] = i; 4932 } 4933 } 4934 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4935 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4936 *localis = localis_t; 4937 PetscFunctionReturn(0); 4938 } 4939 4940 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4941 #undef __FUNCT__ 4942 #define __FUNCT__ "PCBDDCMatMult_Private" 4943 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4944 { 4945 PCBDDCChange_ctx change_ctx; 4946 PetscErrorCode ierr; 4947 4948 PetscFunctionBegin; 4949 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4950 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4951 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4952 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4953 PetscFunctionReturn(0); 4954 } 4955 4956 #undef __FUNCT__ 4957 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4958 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4959 { 4960 PCBDDCChange_ctx change_ctx; 4961 PetscErrorCode ierr; 4962 4963 PetscFunctionBegin; 4964 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4965 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4966 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4967 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4968 PetscFunctionReturn(0); 4969 } 4970 4971 #undef __FUNCT__ 4972 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4973 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4974 { 4975 PC_IS *pcis=(PC_IS*)pc->data; 4976 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4977 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4978 Mat S_j; 4979 PetscInt *used_xadj,*used_adjncy; 4980 PetscBool free_used_adj; 4981 PetscErrorCode ierr; 4982 4983 PetscFunctionBegin; 4984 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4985 free_used_adj = PETSC_FALSE; 4986 if (pcbddc->sub_schurs_layers == -1) { 4987 used_xadj = NULL; 4988 used_adjncy = NULL; 4989 } else { 4990 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4991 used_xadj = pcbddc->mat_graph->xadj; 4992 used_adjncy = pcbddc->mat_graph->adjncy; 4993 } else if (pcbddc->computed_rowadj) { 4994 used_xadj = pcbddc->mat_graph->xadj; 4995 used_adjncy = pcbddc->mat_graph->adjncy; 4996 } else { 4997 PetscBool flg_row=PETSC_FALSE; 4998 const PetscInt *xadj,*adjncy; 4999 PetscInt nvtxs; 5000 5001 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5002 if (flg_row) { 5003 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5004 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5005 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5006 free_used_adj = PETSC_TRUE; 5007 } else { 5008 pcbddc->sub_schurs_layers = -1; 5009 used_xadj = NULL; 5010 used_adjncy = NULL; 5011 } 5012 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5013 } 5014 } 5015 5016 /* setup sub_schurs data */ 5017 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5018 if (!sub_schurs->use_mumps) { 5019 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5020 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5021 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5022 } else { 5023 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5024 PetscBool isseqaij; 5025 if (!pcbddc->use_vertices && reuse_solvers) { 5026 PetscInt n_vertices; 5027 5028 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5029 reuse_solvers = (PetscBool)!n_vertices; 5030 } 5031 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5032 if (!isseqaij) { 5033 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5034 if (matis->A == pcbddc->local_mat) { 5035 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5036 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5037 } else { 5038 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5039 } 5040 } 5041 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5042 } 5043 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5044 5045 /* free adjacency */ 5046 if (free_used_adj) { 5047 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5048 } 5049 PetscFunctionReturn(0); 5050 } 5051 5052 #undef __FUNCT__ 5053 #define __FUNCT__ "PCBDDCInitSubSchurs" 5054 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5055 { 5056 PC_IS *pcis=(PC_IS*)pc->data; 5057 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5058 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5059 PCBDDCGraph graph; 5060 PetscErrorCode ierr; 5061 5062 PetscFunctionBegin; 5063 /* attach interface graph for determining subsets */ 5064 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5065 IS verticesIS,verticescomm; 5066 PetscInt vsize,*idxs; 5067 5068 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5069 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5070 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5071 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5072 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5073 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5074 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5075 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5076 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5077 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5078 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5079 /* 5080 if (pcbddc->dbg_flag) { 5081 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5082 } 5083 */ 5084 } else { 5085 graph = pcbddc->mat_graph; 5086 } 5087 5088 /* sub_schurs init */ 5089 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5090 5091 /* free graph struct */ 5092 if (pcbddc->sub_schurs_rebuild) { 5093 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5094 } 5095 PetscFunctionReturn(0); 5096 } 5097