1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 51 PetscInt subset_size; 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 } 56 57 /* min/max and threshold */ 58 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 59 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 60 nmax = PetscMax(nmin,nmax); 61 allocated_S_St = PETSC_FALSE; 62 if (nmin) { 63 allocated_S_St = PETSC_TRUE; 64 } 65 66 /* allocate lapack workspace */ 67 cum = cum2 = 0; 68 maxneigs = 0; 69 for (i=0;i<sub_schurs->n_subs;i++) { 70 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 71 PetscInt n,subset_size; 72 73 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 74 n = PetscMin(subset_size,nmax); 75 cum += subset_size; 76 cum2 += subset_size*n; 77 maxneigs = PetscMax(maxneigs,n); 78 } 79 } 80 if (mss) { 81 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 82 PetscBLASInt B_itype = 1; 83 PetscBLASInt B_N = mss; 84 PetscReal zero = 0.0; 85 PetscReal eps = 0.0; /* dlamch? */ 86 87 B_lwork = -1; 88 S = NULL; 89 St = NULL; 90 eigs = NULL; 91 eigv = NULL; 92 B_iwork = NULL; 93 B_ifail = NULL; 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 112 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 PetscInt j,subset_size; 155 156 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 157 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 158 const PetscInt *idxs; 159 PetscReal infty = PETSC_MAX_REAL; 160 PetscInt eigs_start = 0; 161 PetscBLASInt B_N; 162 163 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 164 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 165 if (sub_schurs->is_hermitian) { 166 PetscInt j; 167 for (j=0;j<subset_size;j++) { 168 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 for (j=0;j<subset_size;j++) { 171 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 172 } 173 } else { 174 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 } 177 } else { 178 S = Sarray + cumarray; 179 St = Starray + cumarray; 180 } 181 182 /* Threshold: this is an heuristic for edges */ 183 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 184 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 185 186 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 187 PetscBLASInt B_itype = 1; 188 PetscBLASInt B_IL, B_IU; 189 PetscReal eps = -1.0; /* dlamch? */ 190 PetscInt nmin_s; 191 192 /* ask for eigenvalues larger than thresh */ 193 if (pcbddc->dbg_flag) { 194 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 195 } 196 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 197 #if defined(PETSC_USE_COMPLEX) 198 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 199 #else 200 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 201 #endif 202 ierr = PetscFPTrapPop();CHKERRQ(ierr); 203 if (B_ierr) { 204 if (B_ierr < 0 ) { 205 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 206 } else if (B_ierr <= B_N) { 207 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 208 } else { 209 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 210 } 211 } 212 213 if (B_neigs > nmax) { 214 if (pcbddc->dbg_flag) { 215 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 216 } 217 eigs_start = B_neigs -nmax; 218 B_neigs = nmax; 219 } 220 221 nmin_s = PetscMin(nmin,B_N); 222 if (B_neigs < nmin_s) { 223 PetscBLASInt B_neigs2; 224 225 B_IU = B_N - B_neigs; 226 B_IL = B_N - nmin_s + 1; 227 if (pcbddc->dbg_flag) { 228 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 229 } 230 if (sub_schurs->is_hermitian) { 231 PetscInt j; 232 for (j=0;j<subset_size;j++) { 233 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 234 } 235 for (j=0;j<subset_size;j++) { 236 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 237 } 238 } else { 239 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 240 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 241 } 242 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 243 #if defined(PETSC_USE_COMPLEX) 244 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 245 #else 246 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 247 #endif 248 ierr = PetscFPTrapPop();CHKERRQ(ierr); 249 B_neigs += B_neigs2; 250 } 251 if (B_ierr) { 252 if (B_ierr < 0 ) { 253 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 254 } else if (B_ierr <= B_N) { 255 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 256 } else { 257 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 258 } 259 } 260 if (pcbddc->dbg_flag) { 261 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 262 for (j=0;j<B_neigs;j++) { 263 if (eigs[j] == 0.0) { 264 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 265 } else { 266 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]); 267 } 268 } 269 } 270 } else { 271 /* TODO */ 272 } 273 274 maxneigs = PetscMax(B_neigs,maxneigs); 275 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 276 if (B_neigs) { 277 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 278 279 if (pcbddc->dbg_flag > 1) { 280 PetscInt ii; 281 for (ii=0;ii<B_neigs;ii++) { 282 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 283 for (j=0;j<B_N;j++) { 284 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 285 } 286 } 287 } 288 #if 0 289 for (j=0;j<B_neigs;j++) { 290 PetscBLASInt Blas_N,Blas_one = 1.0; 291 PetscScalar norm; 292 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 293 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 294 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 295 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 296 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 297 } else { 298 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 299 } 300 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 301 } 302 #endif 303 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 304 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 305 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 306 cum++; 307 } 308 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 309 } 310 /* shift for next computation */ 311 cumarray += subset_size*subset_size; 312 } 313 if (pcbddc->dbg_flag) { 314 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 315 } 316 317 if (mss) { 318 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 319 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 320 /* destroy matrices (junk) */ 321 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 322 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 323 } 324 if (allocated_S_St) { 325 ierr = PetscFree2(S,St);CHKERRQ(ierr); 326 } 327 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 328 #if defined(PETSC_USE_COMPLEX) 329 ierr = PetscFree(rwork);CHKERRQ(ierr); 330 #endif 331 if (pcbddc->dbg_flag) { 332 PetscInt maxneigs_r; 333 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 334 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 335 } 336 PetscFunctionReturn(0); 337 } 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "PCBDDCSetUpSolvers" 341 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 342 { 343 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 344 PetscScalar *coarse_submat_vals; 345 PetscErrorCode ierr; 346 347 PetscFunctionBegin; 348 /* Setup local scatters R_to_B and (optionally) R_to_D */ 349 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 350 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 351 352 /* Setup local neumann solver ksp_R */ 353 /* PCBDDCSetUpLocalScatters should be called first! */ 354 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 355 356 /* Change global null space passed in by the user if change of basis has been requested */ 357 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 358 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 359 } 360 361 /* 362 Setup local correction and local part of coarse basis. 363 Gives back the dense local part of the coarse matrix in column major ordering 364 */ 365 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 366 367 /* Compute total number of coarse nodes and setup coarse solver */ 368 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 369 370 /* free */ 371 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "PCBDDCResetCustomization" 377 PetscErrorCode PCBDDCResetCustomization(PC pc) 378 { 379 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 384 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 385 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 386 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 387 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 388 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 389 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 390 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 392 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 393 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 #undef __FUNCT__ 398 #define __FUNCT__ "PCBDDCResetTopography" 399 PetscErrorCode PCBDDCResetTopography(PC pc) 400 { 401 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 402 PetscErrorCode ierr; 403 404 PetscFunctionBegin; 405 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 406 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 407 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 408 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 409 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "PCBDDCResetSolvers" 415 PetscErrorCode PCBDDCResetSolvers(PC pc) 416 { 417 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 418 PetscScalar *array; 419 PetscErrorCode ierr; 420 421 PetscFunctionBegin; 422 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 423 if (pcbddc->coarse_phi_B) { 424 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 425 ierr = PetscFree(array);CHKERRQ(ierr); 426 } 427 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 428 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 429 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 430 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 431 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 432 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 433 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 434 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 435 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 436 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 437 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 438 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 439 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 440 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 441 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 442 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 443 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 444 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 445 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 446 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 447 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 448 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 449 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 450 PetscFunctionReturn(0); 451 } 452 453 #undef __FUNCT__ 454 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 455 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 456 { 457 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 458 PC_IS *pcis = (PC_IS*)pc->data; 459 VecType impVecType; 460 PetscInt n_constraints,n_R,old_size; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 if (!pcbddc->ConstraintMatrix) { 465 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 466 } 467 /* get sizes */ 468 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 469 n_R = pcis->n-pcbddc->n_vertices; 470 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 471 /* local work vectors (try to avoid unneeded work)*/ 472 /* R nodes */ 473 old_size = -1; 474 if (pcbddc->vec1_R) { 475 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 476 } 477 if (n_R != old_size) { 478 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 479 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 480 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 481 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 482 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 483 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 484 } 485 /* local primal dofs */ 486 old_size = -1; 487 if (pcbddc->vec1_P) { 488 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 489 } 490 if (pcbddc->local_primal_size != old_size) { 491 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 492 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 493 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 494 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 495 } 496 /* local explicit constraints */ 497 old_size = -1; 498 if (pcbddc->vec1_C) { 499 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 500 } 501 if (n_constraints && n_constraints != old_size) { 502 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 503 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 504 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 505 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 506 } 507 PetscFunctionReturn(0); 508 } 509 510 #undef __FUNCT__ 511 #define __FUNCT__ "PCBDDCSetUpCorrection" 512 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 513 { 514 PetscErrorCode ierr; 515 /* pointers to pcis and pcbddc */ 516 PC_IS* pcis = (PC_IS*)pc->data; 517 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 518 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 519 /* submatrices of local problem */ 520 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 521 /* submatrices of local coarse problem */ 522 Mat S_VV,S_CV,S_VC,S_CC; 523 /* working matrices */ 524 Mat C_CR; 525 /* additional working stuff */ 526 PC pc_R; 527 Mat F; 528 PetscBool isLU,isCHOL,isILU; 529 530 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 531 PetscScalar *work; 532 PetscInt *idx_V_B; 533 PetscInt n,n_vertices,n_constraints; 534 PetscInt i,n_R,n_D,n_B; 535 PetscBool unsymmetric_check; 536 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 537 MatType impMatType; 538 /* some shortcuts to scalars */ 539 PetscScalar one=1.0,m_one=-1.0; 540 541 PetscFunctionBegin; 542 n_vertices = pcbddc->n_vertices; 543 n_constraints = pcbddc->local_primal_size-n_vertices; 544 /* Set Non-overlapping dimensions */ 545 n_B = pcis->n_B; 546 n_D = pcis->n - n_B; 547 n_R = pcis->n - n_vertices; 548 549 /* Set types for local objects needed by BDDC precondtioner */ 550 impMatType = MATSEQDENSE; 551 552 /* vertices in boundary numbering */ 553 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 554 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 555 if (i != n_vertices) { 556 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 557 } 558 559 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 560 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 562 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 563 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 564 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 565 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 566 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 567 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 568 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 569 570 unsymmetric_check = PETSC_FALSE; 571 /* allocate workspace */ 572 n = 0; 573 if (n_constraints) { 574 n += n_R*n_constraints; 575 } 576 if (n_vertices) { 577 n = PetscMax(2*n_R*n_vertices,n); 578 n = PetscMax((n_R+n_B)*n_vertices,n); 579 } 580 if (!pcbddc->issym) { 581 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 582 unsymmetric_check = PETSC_TRUE; 583 } 584 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 585 586 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 587 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 588 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 589 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 590 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 591 if (isLU || isILU || isCHOL) { 592 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 593 } else if (sub_schurs->reuse_mumps) { 594 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 595 MatFactorType type; 596 597 F = reuse_mumps->F; 598 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 599 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 600 } else { 601 F = NULL; 602 } 603 604 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 605 if (n_constraints) { 606 Mat M1,M2,M3; 607 Mat auxmat; 608 IS is_aux; 609 PetscScalar *array,*array2; 610 611 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 612 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 613 614 /* Extract constraints on R nodes: C_{CR} */ 615 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 616 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 617 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 618 619 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 620 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 621 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 622 for (i=0;i<n_constraints;i++) { 623 const PetscScalar *row_cmat_values; 624 const PetscInt *row_cmat_indices; 625 PetscInt size_of_constraint,j; 626 627 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 628 for (j=0;j<size_of_constraint;j++) { 629 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 630 } 631 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 632 } 633 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 634 if (F) { 635 Mat B; 636 637 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 638 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 639 ierr = MatDestroy(&B);CHKERRQ(ierr); 640 } else { 641 PetscScalar *marr; 642 643 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 644 for (i=0;i<n_constraints;i++) { 645 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 646 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 647 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 648 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 649 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 650 } 651 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 652 } 653 if (!pcbddc->switch_static) { 654 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 655 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 656 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 657 for (i=0;i<n_constraints;i++) { 658 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 659 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 660 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 661 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 662 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 663 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 664 } 665 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 666 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 667 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 668 } else { 669 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 670 pcbddc->local_auxmat2 = local_auxmat2_R; 671 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 672 } 673 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 674 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 675 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 676 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 677 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 678 if (isCHOL) { 679 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 680 } else { 681 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 682 } 683 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 684 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 685 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 686 ierr = MatDestroy(&M2);CHKERRQ(ierr); 687 ierr = MatDestroy(&M3);CHKERRQ(ierr); 688 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 689 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 690 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 691 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 692 ierr = MatDestroy(&M1);CHKERRQ(ierr); 693 } 694 /* Get submatrices from subdomain matrix */ 695 if (n_vertices) { 696 Mat newmat; 697 IS is_aux; 698 PetscInt ibs,mbs; 699 PetscBool issbaij; 700 701 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 702 IS tis; 703 704 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 705 ierr = ISSort(tis);CHKERRQ(ierr); 706 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 707 ierr = ISDestroy(&tis);CHKERRQ(ierr); 708 } else { 709 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 710 } 711 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 712 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 713 if (ibs != mbs) { /* need to convert to SEQAIJ */ 714 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 715 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 716 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 717 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 718 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 719 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 720 } else { 721 /* this is safe */ 722 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 723 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 724 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 725 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 726 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 727 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 728 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 729 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 730 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 731 } else { 732 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 733 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 734 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 735 } 736 /* need to convert to SeqAIJ for MatMatMult with dense matrix */ 737 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 738 } 739 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 740 } 741 742 /* Matrix of coarse basis functions (local) */ 743 if (pcbddc->coarse_phi_B) { 744 PetscInt on_B,on_primal,on_D=n_D; 745 if (pcbddc->coarse_phi_D) { 746 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 747 } 748 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 749 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 750 PetscScalar *marray; 751 752 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 753 ierr = PetscFree(marray);CHKERRQ(ierr); 754 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 755 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 756 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 757 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 758 } 759 } 760 761 if (!pcbddc->coarse_phi_B) { 762 PetscScalar *marray; 763 764 n = n_B*pcbddc->local_primal_size; 765 if (pcbddc->switch_static || pcbddc->dbg_flag) { 766 n += n_D*pcbddc->local_primal_size; 767 } 768 if (!pcbddc->issym) { 769 n *= 2; 770 } 771 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 772 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 773 n = n_B*pcbddc->local_primal_size; 774 if (pcbddc->switch_static || pcbddc->dbg_flag) { 775 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 776 n += n_D*pcbddc->local_primal_size; 777 } 778 if (!pcbddc->issym) { 779 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 780 if (pcbddc->switch_static || pcbddc->dbg_flag) { 781 n = n_B*pcbddc->local_primal_size; 782 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 783 } 784 } else { 785 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 786 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 787 if (pcbddc->switch_static || pcbddc->dbg_flag) { 788 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 789 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 790 } 791 } 792 } 793 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 794 /* vertices */ 795 if (n_vertices) { 796 797 if (n_R) { 798 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 799 PetscBLASInt B_N,B_one = 1; 800 PetscScalar *x,*y; 801 802 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 803 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 804 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 805 if (F) { /* TODO could be optimized for symmetric problems */ 806 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 807 } else { 808 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 809 for (i=0;i<n_vertices;i++) { 810 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 811 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 812 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 813 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 814 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 815 } 816 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 817 } 818 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 819 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 820 if (n_constraints) { 821 Mat B; 822 823 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 824 for (i=0;i<n_vertices;i++) { 825 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 826 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 827 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 828 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 829 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 830 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 831 } 832 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 833 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 834 ierr = MatDestroy(&B);CHKERRQ(ierr); 835 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 836 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 837 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 838 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 839 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 840 ierr = MatDestroy(&B);CHKERRQ(ierr); 841 } 842 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 843 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 844 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 845 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 846 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 847 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 848 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 849 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 850 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 851 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 852 } else { 853 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 854 } 855 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 856 /* coarse basis functions */ 857 for (i=0;i<n_vertices;i++) { 858 PetscScalar *y; 859 860 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 861 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 862 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 863 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 864 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 865 y[n_B*i+idx_V_B[i]] = 1.0; 866 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 867 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 868 869 if (pcbddc->switch_static || pcbddc->dbg_flag) { 870 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 871 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 872 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 875 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 876 } 877 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 878 } 879 } 880 881 if (n_constraints) { 882 Mat B; 883 884 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 885 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 886 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 887 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 888 if (n_vertices) { 889 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 890 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 891 } else { 892 Mat S_VCt; 893 894 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 895 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 896 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 897 } 898 } 899 ierr = MatDestroy(&B);CHKERRQ(ierr); 900 /* coarse basis functions */ 901 for (i=0;i<n_constraints;i++) { 902 PetscScalar *y; 903 904 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 905 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 906 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 907 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 910 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 911 if (pcbddc->switch_static || pcbddc->dbg_flag) { 912 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 913 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 914 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 917 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 918 } 919 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 920 } 921 } 922 if (n_constraints) { 923 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 924 } 925 926 /* compute other basis functions for non-symmetric problems */ 927 if (!pcbddc->issym) { 928 929 if (n_constraints) { 930 Mat S_CCT,B_C; 931 932 /* this is a lazy thing */ 933 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 934 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 935 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 936 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 937 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 938 if (n_vertices) { 939 Mat B_V,S_VCT; 940 941 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 942 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 943 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 944 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 945 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 946 } 947 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 948 } else { /* if there are no constraints, reset work */ 949 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 950 } 951 if (n_vertices && n_R) { 952 Mat A_VRT; 953 PetscScalar *marray; 954 PetscBLASInt B_N,B_one = 1; 955 956 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 957 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 958 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 959 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 960 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 961 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 962 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 963 } 964 965 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 966 for (i=0;i<pcbddc->local_primal_size;i++) { 967 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 968 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 969 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 970 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 971 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 972 } 973 } else { 974 for (i=0;i<pcbddc->local_primal_size;i++) { 975 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 976 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 977 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 978 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 979 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 980 } 981 } 982 /* coarse basis functions */ 983 for (i=0;i<pcbddc->local_primal_size;i++) { 984 PetscScalar *y; 985 986 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 987 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 988 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 989 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991 if (i<n_vertices) { 992 y[n_B*i+idx_V_B[i]] = 1.0; 993 } 994 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 995 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 996 997 if (pcbddc->switch_static || pcbddc->dbg_flag) { 998 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 999 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1003 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1004 } 1005 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1006 } 1007 } 1008 /* free memory */ 1009 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1010 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1011 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1012 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1013 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1014 ierr = PetscFree(work);CHKERRQ(ierr); 1015 if (n_vertices) { 1016 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1017 } 1018 if (n_constraints) { 1019 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1020 } 1021 /* Checking coarse_sub_mat and coarse basis functios */ 1022 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1023 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1024 if (pcbddc->dbg_flag) { 1025 Mat coarse_sub_mat; 1026 Mat AUXMAT,TM1,TM2,TM3,TM4; 1027 Mat coarse_phi_D,coarse_phi_B; 1028 Mat coarse_psi_D,coarse_psi_B; 1029 Mat A_II,A_BB,A_IB,A_BI; 1030 Mat C_B,CPHI; 1031 IS is_dummy; 1032 Vec mones; 1033 MatType checkmattype=MATSEQAIJ; 1034 PetscReal real_value; 1035 1036 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1037 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1038 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1039 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1040 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1041 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1042 if (unsymmetric_check) { 1043 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1044 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1045 } 1046 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1047 1048 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1049 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 1050 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1051 if (unsymmetric_check) { 1052 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1053 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1054 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1055 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1056 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1057 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1058 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1059 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1060 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1061 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1062 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1063 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1064 } else { 1065 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1066 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1067 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1068 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1069 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1070 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1071 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1072 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1073 } 1074 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1075 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1076 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1077 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1078 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1079 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1081 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1082 1083 /* check constraints */ 1084 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1085 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1086 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1087 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1088 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1089 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1090 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1091 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1092 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1093 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1094 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1095 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1096 1097 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1098 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1099 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1100 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1101 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1102 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1103 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1104 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1105 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1106 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1107 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1108 if (unsymmetric_check) { 1109 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1110 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1111 } 1112 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1113 } 1114 /* get back data */ 1115 *coarse_submat_vals_n = coarse_submat_vals; 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1121 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1122 { 1123 Mat *work_mat; 1124 IS isrow_s,iscol_s; 1125 PetscBool rsorted,csorted; 1126 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1131 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1132 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1133 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1134 1135 if (!rsorted) { 1136 const PetscInt *idxs; 1137 PetscInt *idxs_sorted,i; 1138 1139 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1140 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1141 for (i=0;i<rsize;i++) { 1142 idxs_perm_r[i] = i; 1143 } 1144 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1145 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1146 for (i=0;i<rsize;i++) { 1147 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1148 } 1149 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1150 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1151 } else { 1152 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1153 isrow_s = isrow; 1154 } 1155 1156 if (!csorted) { 1157 if (isrow == iscol) { 1158 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1159 iscol_s = isrow_s; 1160 } else { 1161 const PetscInt *idxs; 1162 PetscInt *idxs_sorted,i; 1163 1164 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1165 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1166 for (i=0;i<csize;i++) { 1167 idxs_perm_c[i] = i; 1168 } 1169 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1170 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1171 for (i=0;i<csize;i++) { 1172 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1173 } 1174 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1175 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1176 } 1177 } else { 1178 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1179 iscol_s = iscol; 1180 } 1181 1182 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1183 1184 if (!rsorted || !csorted) { 1185 Mat new_mat; 1186 IS is_perm_r,is_perm_c; 1187 1188 if (!rsorted) { 1189 PetscInt *idxs_r,i; 1190 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1191 for (i=0;i<rsize;i++) { 1192 idxs_r[idxs_perm_r[i]] = i; 1193 } 1194 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1195 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1196 } else { 1197 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1198 } 1199 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1200 1201 if (!csorted) { 1202 if (isrow_s == iscol_s) { 1203 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1204 is_perm_c = is_perm_r; 1205 } else { 1206 PetscInt *idxs_c,i; 1207 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1208 for (i=0;i<csize;i++) { 1209 idxs_c[idxs_perm_c[i]] = i; 1210 } 1211 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1212 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1213 } 1214 } else { 1215 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1216 } 1217 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1218 1219 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1220 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1221 work_mat[0] = new_mat; 1222 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1223 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1224 } 1225 1226 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1227 *B = work_mat[0]; 1228 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1229 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1230 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1236 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1237 { 1238 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1239 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1240 Mat new_mat; 1241 IS is_local,is_global; 1242 PetscInt local_size; 1243 PetscBool isseqaij; 1244 PetscErrorCode ierr; 1245 1246 PetscFunctionBegin; 1247 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1248 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1249 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1250 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1251 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1252 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1253 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1254 1255 /* check */ 1256 if (pcbddc->dbg_flag) { 1257 Vec x,x_change; 1258 PetscReal error; 1259 1260 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1261 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1262 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1263 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1266 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1267 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1268 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1269 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1270 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1271 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1272 ierr = VecDestroy(&x);CHKERRQ(ierr); 1273 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1274 } 1275 1276 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1277 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1278 if (isseqaij) { 1279 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1280 } else { 1281 Mat work_mat; 1282 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1283 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1284 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1285 } 1286 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1287 #if !defined(PETSC_USE_COMPLEX) 1288 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr); 1289 #endif 1290 /* 1291 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1292 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1293 */ 1294 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 #undef __FUNCT__ 1299 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1300 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1301 { 1302 PC_IS* pcis = (PC_IS*)(pc->data); 1303 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1304 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1305 IS is_B_reuse_mumps=NULL; 1306 PetscInt *idx_R_local; 1307 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1308 PetscInt vbs,bs; 1309 PetscBT bitmask=NULL; 1310 PetscErrorCode ierr; 1311 1312 PetscFunctionBegin; 1313 /* 1314 No need to setup local scatters if 1315 - primal space is unchanged 1316 AND 1317 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1318 AND 1319 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1320 */ 1321 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1322 PetscFunctionReturn(0); 1323 } 1324 /* destroy old objects */ 1325 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1326 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1327 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1328 /* Set Non-overlapping dimensions */ 1329 n_B = pcis->n_B; 1330 n_D = pcis->n - n_B; 1331 n_vertices = pcbddc->n_vertices; 1332 1333 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1334 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1335 1336 /* create auxiliary bitmask */ 1337 if (!sub_schurs->reuse_mumps) { 1338 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1339 for (i=0;i<n_vertices;i++) { 1340 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1341 } 1342 1343 for (i=0, n_R=0; i<pcis->n; i++) { 1344 if (!PetscBTLookup(bitmask,i)) { 1345 idx_R_local[n_R++] = i; 1346 } 1347 } 1348 } else { /* A different ordering and auxiliary scatters are needed we are reusing MUMPS Schur solver */ 1349 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1350 const PetscInt *idxs; 1351 PetscInt schur_size; 1352 IS list[2],tisB_N,tisR; 1353 1354 if (sub_schurs->is_dir) { 1355 list[0] = sub_schurs->is_Ej_all; 1356 list[1] = sub_schurs->is_dir; 1357 ierr = ISGlobalToLocalMappingApplyIS(pcis->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_dir,&list[1]);CHKERRQ(ierr); 1358 ierr = ISConcatenate(PETSC_COMM_SELF,2,list,&is_B_reuse_mumps);CHKERRQ(ierr); 1359 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1360 } else { 1361 ierr = ISDuplicate(sub_schurs->is_Ej_all,&is_B_reuse_mumps);CHKERRQ(ierr); 1362 } 1363 ierr = ISGetLocalSize(is_B_reuse_mumps,&schur_size);CHKERRQ(ierr); 1364 ierr = VecScatterDestroy(&reuse_mumps->correction_scatter_B);CHKERRQ(ierr); 1365 ierr = VecScatterCreate(pcis->vec1_B,is_B_reuse_mumps,reuse_mumps->solB,NULL,&reuse_mumps->correction_scatter_B);CHKERRQ(ierr); 1366 ierr = ISLocalToGlobalMappingApplyIS(sub_schurs->BtoNmap,is_B_reuse_mumps,&tisB_N);CHKERRQ(ierr); 1367 list[0] = pcis->is_I_local; 1368 list[1] = tisB_N; 1369 ierr = ISConcatenate(PETSC_COMM_SELF,2,list,&tisR);CHKERRQ(ierr); 1370 ierr = ISDestroy(&tisB_N);CHKERRQ(ierr); 1371 ierr = ISGetLocalSize(tisR,&n_R);CHKERRQ(ierr); 1372 ierr = ISGetIndices(tisR,&idxs);CHKERRQ(ierr); 1373 ierr = PetscMemcpy(idx_R_local,idxs,n_R*sizeof(PetscInt));CHKERRQ(ierr); 1374 ierr = ISRestoreIndices(tisR,&idxs);CHKERRQ(ierr); 1375 ierr = ISDestroy(&tisR);CHKERRQ(ierr); 1376 } 1377 1378 /* Block code */ 1379 vbs = 1; 1380 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1381 if (bs>1 && !(n_vertices%bs)) { 1382 PetscBool is_blocked = PETSC_TRUE; 1383 PetscInt *vary; 1384 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1385 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1386 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1387 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1388 for (i=0; i<n_vertices/bs; i++) { 1389 if (vary[i]!=0 && vary[i]!=bs) { 1390 is_blocked = PETSC_FALSE; 1391 break; 1392 } 1393 } 1394 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1395 vbs = bs; 1396 for (i=0;i<n_R/vbs;i++) { 1397 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1398 } 1399 } 1400 ierr = PetscFree(vary);CHKERRQ(ierr); 1401 } 1402 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1403 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1404 1405 /* print some info if requested */ 1406 if (pcbddc->dbg_flag) { 1407 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1408 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1409 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1410 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1411 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1412 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1413 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1414 } 1415 1416 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1417 if (!sub_schurs->reuse_mumps) { 1418 IS is_aux1,is_aux2; 1419 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1420 1421 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1422 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1423 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1424 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1425 for (i=0; i<n_D; i++) { 1426 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1427 } 1428 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1429 for (i=0, j=0; i<n_R; i++) { 1430 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1431 aux_array1[j++] = i; 1432 } 1433 } 1434 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1435 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1436 for (i=0, j=0; i<n_B; i++) { 1437 if (!PetscBTLookup(bitmask,is_indices[i])) { 1438 aux_array2[j++] = i; 1439 } 1440 } 1441 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1442 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1443 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1444 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1445 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1446 1447 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1448 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1449 for (i=0, j=0; i<n_R; i++) { 1450 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1451 aux_array1[j++] = i; 1452 } 1453 } 1454 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1455 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1456 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1457 } 1458 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1459 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1460 } else { 1461 IS tis; 1462 PetscInt schur_size; 1463 1464 ierr = ISGetLocalSize(is_B_reuse_mumps,&schur_size);CHKERRQ(ierr); 1465 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1466 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,is_B_reuse_mumps,&pcbddc->R_to_B);CHKERRQ(ierr); 1467 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1468 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1469 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1470 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1471 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1472 } 1473 } 1474 ierr = ISDestroy(&is_B_reuse_mumps);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1481 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1482 { 1483 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1484 PC_IS *pcis = (PC_IS*)pc->data; 1485 PC pc_temp; 1486 Mat A_RR; 1487 MatReuse reuse; 1488 PetscScalar m_one = -1.0; 1489 PetscReal value; 1490 PetscInt n_D,n_R,ibs,mbs; 1491 PetscBool use_exact,use_exact_reduced,issbaij; 1492 PetscErrorCode ierr; 1493 /* prefixes stuff */ 1494 char dir_prefix[256],neu_prefix[256],str_level[16]; 1495 size_t len; 1496 1497 PetscFunctionBegin; 1498 1499 /* compute prefixes */ 1500 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1501 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1502 if (!pcbddc->current_level) { 1503 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1504 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1505 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1506 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1507 } else { 1508 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1509 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1510 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1511 len -= 15; /* remove "pc_bddc_coarse_" */ 1512 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1513 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1514 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1515 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1516 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1517 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1518 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1519 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1520 } 1521 1522 /* DIRICHLET PROBLEM */ 1523 if (dirichlet) { 1524 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1525 if (pcbddc->issym) { 1526 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1527 } 1528 /* Matrix for Dirichlet problem is pcis->A_II */ 1529 n_D = pcis->n - pcis->n_B; 1530 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1531 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1532 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1533 /* default */ 1534 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1535 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1536 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1537 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1538 if (issbaij) { 1539 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1540 } else { 1541 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1542 } 1543 /* Allow user's customization */ 1544 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1545 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1546 } 1547 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1548 if (sub_schurs->reuse_mumps) { 1549 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1550 1551 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1552 } 1553 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1554 if (!n_D) { 1555 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1556 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1557 } 1558 /* Set Up KSP for Dirichlet problem of BDDC */ 1559 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1560 /* set ksp_D into pcis data */ 1561 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1562 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1563 pcis->ksp_D = pcbddc->ksp_D; 1564 } 1565 1566 /* NEUMANN PROBLEM */ 1567 A_RR = 0; 1568 if (neumann) { 1569 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1570 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1571 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1572 if (pcbddc->ksp_R) { /* already created ksp */ 1573 PetscInt nn_R; 1574 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1575 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1576 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1577 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1578 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1579 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1580 reuse = MAT_INITIAL_MATRIX; 1581 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1582 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1583 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1584 reuse = MAT_INITIAL_MATRIX; 1585 } else { /* safe to reuse the matrix */ 1586 reuse = MAT_REUSE_MATRIX; 1587 } 1588 } 1589 /* last check */ 1590 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1591 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1592 reuse = MAT_INITIAL_MATRIX; 1593 } 1594 } else { /* first time, so we need to create the matrix */ 1595 reuse = MAT_INITIAL_MATRIX; 1596 } 1597 /* extract A_RR */ 1598 if (!sub_schurs->reuse_mumps) { 1599 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1600 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1601 if (ibs != mbs) { 1602 Mat newmat; 1603 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1604 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1605 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1606 } else { 1607 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1608 } 1609 if (pcbddc->issym) { 1610 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1611 } 1612 } else { 1613 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1614 1615 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1616 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1617 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1618 } 1619 if (!pcbddc->ksp_R) { /* create object if not present */ 1620 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1621 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1622 /* default */ 1623 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1624 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1625 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1626 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1627 if (issbaij) { 1628 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1629 } else { 1630 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1631 } 1632 /* Allow user's customization */ 1633 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1634 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1635 } 1636 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1637 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1638 if (!n_R) { 1639 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1640 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1641 } 1642 /* Reuse MUMPS solver if it is present */ 1643 if (sub_schurs->reuse_mumps) { 1644 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1645 1646 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1647 } 1648 /* Set Up KSP for Neumann problem of BDDC */ 1649 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1650 } 1651 /* free Neumann problem's matrix */ 1652 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1653 1654 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1655 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1656 if (pcbddc->dbg_flag) { 1657 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1658 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1659 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1660 } 1661 if (dirichlet) { /* Dirichlet */ 1662 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1663 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1664 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1665 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1666 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1667 /* need to be adapted? */ 1668 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1669 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1670 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1671 /* print info */ 1672 if (pcbddc->dbg_flag) { 1673 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1674 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1675 } 1676 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1677 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1678 } 1679 } 1680 if (neumann) { /* Neumann */ 1681 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1682 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1683 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1684 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1685 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1686 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1687 /* need to be adapted? */ 1688 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1689 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1690 /* print info */ 1691 if (pcbddc->dbg_flag) { 1692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1693 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1694 } 1695 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1696 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1697 } 1698 } 1699 } 1700 PetscFunctionReturn(0); 1701 } 1702 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1705 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1706 { 1707 PetscErrorCode ierr; 1708 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1709 1710 PetscFunctionBegin; 1711 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1712 if (!pcbddc->switch_static) { 1713 if (applytranspose && pcbddc->local_auxmat1) { 1714 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1715 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1716 } 1717 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1718 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1719 } else { 1720 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1721 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1722 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1723 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1724 if (applytranspose && pcbddc->local_auxmat1) { 1725 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1726 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1727 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1728 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1729 } 1730 } 1731 if (applytranspose) { 1732 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1733 } else { 1734 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1735 } 1736 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1737 if (!pcbddc->switch_static) { 1738 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1739 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1740 if (!applytranspose && pcbddc->local_auxmat1) { 1741 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1742 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1743 } 1744 } else { 1745 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1746 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1747 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1748 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1749 if (!applytranspose && pcbddc->local_auxmat1) { 1750 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1751 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1752 } 1753 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1754 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1755 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1756 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1757 } 1758 PetscFunctionReturn(0); 1759 } 1760 1761 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1762 #undef __FUNCT__ 1763 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1764 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1765 { 1766 PetscErrorCode ierr; 1767 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1768 PC_IS* pcis = (PC_IS*) (pc->data); 1769 const PetscScalar zero = 0.0; 1770 1771 PetscFunctionBegin; 1772 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1773 if (applytranspose) { 1774 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1775 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1776 } else { 1777 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1778 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1779 } 1780 /* start communications from local primal nodes to rhs of coarse solver */ 1781 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1782 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1783 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1784 1785 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1786 /* TODO remove null space when doing multilevel */ 1787 if (pcbddc->coarse_ksp) { 1788 Vec rhs,sol; 1789 1790 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1791 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1792 if (applytranspose) { 1793 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1794 } else { 1795 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1796 } 1797 } 1798 1799 /* Local solution on R nodes */ 1800 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1801 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1802 } 1803 1804 /* communications from coarse sol to local primal nodes */ 1805 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1806 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1807 1808 /* Sum contributions from two levels */ 1809 if (applytranspose) { 1810 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1811 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1812 } else { 1813 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1814 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1815 } 1816 PetscFunctionReturn(0); 1817 } 1818 1819 #undef __FUNCT__ 1820 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1821 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1822 { 1823 PetscErrorCode ierr; 1824 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1825 PetscScalar *array; 1826 Vec from,to; 1827 1828 PetscFunctionBegin; 1829 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1830 from = pcbddc->coarse_vec; 1831 to = pcbddc->vec1_P; 1832 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1833 Vec tvec; 1834 1835 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1836 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1837 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1838 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1839 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1840 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1841 } 1842 } else { /* from local to global -> put data in coarse right hand side */ 1843 from = pcbddc->vec1_P; 1844 to = pcbddc->coarse_vec; 1845 } 1846 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1847 PetscFunctionReturn(0); 1848 } 1849 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1852 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1853 { 1854 PetscErrorCode ierr; 1855 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1856 PetscScalar *array; 1857 Vec from,to; 1858 1859 PetscFunctionBegin; 1860 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1861 from = pcbddc->coarse_vec; 1862 to = pcbddc->vec1_P; 1863 } else { /* from local to global -> put data in coarse right hand side */ 1864 from = pcbddc->vec1_P; 1865 to = pcbddc->coarse_vec; 1866 } 1867 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1868 if (smode == SCATTER_FORWARD) { 1869 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1870 Vec tvec; 1871 1872 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1873 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1874 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1875 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1876 } 1877 } else { 1878 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1879 ierr = VecResetArray(from);CHKERRQ(ierr); 1880 } 1881 } 1882 PetscFunctionReturn(0); 1883 } 1884 1885 /* uncomment for testing purposes */ 1886 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1887 #undef __FUNCT__ 1888 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1889 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1890 { 1891 PetscErrorCode ierr; 1892 PC_IS* pcis = (PC_IS*)(pc->data); 1893 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1894 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1895 /* one and zero */ 1896 PetscScalar one=1.0,zero=0.0; 1897 /* space to store constraints and their local indices */ 1898 PetscScalar *constraints_data; 1899 PetscInt *constraints_idxs,*constraints_idxs_B; 1900 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1901 PetscInt *constraints_n; 1902 /* iterators */ 1903 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1904 /* BLAS integers */ 1905 PetscBLASInt lwork,lierr; 1906 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1907 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1908 /* reuse */ 1909 PetscInt olocal_primal_size,olocal_primal_size_cc; 1910 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1911 /* change of basis */ 1912 PetscBool qr_needed; 1913 PetscBT change_basis,qr_needed_idx; 1914 /* auxiliary stuff */ 1915 PetscInt *nnz,*is_indices; 1916 PetscInt ncc; 1917 /* some quantities */ 1918 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1919 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1920 1921 PetscFunctionBegin; 1922 /* Destroy Mat objects computed previously */ 1923 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1924 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1925 /* save info on constraints from previous setup (if any) */ 1926 olocal_primal_size = pcbddc->local_primal_size; 1927 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1928 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1929 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1930 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1931 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1932 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1933 1934 /* print some info */ 1935 if (pcbddc->dbg_flag) { 1936 IS vertices; 1937 PetscInt nv,nedges,nfaces; 1938 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1939 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1940 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1941 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1942 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1943 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1944 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1945 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1946 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1947 } 1948 1949 if (!pcbddc->adaptive_selection) { 1950 IS ISForVertices,*ISForFaces,*ISForEdges; 1951 MatNullSpace nearnullsp; 1952 const Vec *nearnullvecs; 1953 Vec *localnearnullsp; 1954 PetscScalar *array; 1955 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1956 PetscBool nnsp_has_cnst; 1957 /* LAPACK working arrays for SVD or POD */ 1958 PetscBool skip_lapack,boolforchange; 1959 PetscScalar *work; 1960 PetscReal *singular_vals; 1961 #if defined(PETSC_USE_COMPLEX) 1962 PetscReal *rwork; 1963 #endif 1964 #if defined(PETSC_MISSING_LAPACK_GESVD) 1965 PetscScalar *temp_basis,*correlation_mat; 1966 #else 1967 PetscBLASInt dummy_int=1; 1968 PetscScalar dummy_scalar=1.; 1969 #endif 1970 1971 /* Get index sets for faces, edges and vertices from graph */ 1972 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1973 /* free unneeded index sets */ 1974 if (!pcbddc->use_vertices) { 1975 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1976 } 1977 if (!pcbddc->use_edges) { 1978 for (i=0;i<n_ISForEdges;i++) { 1979 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1980 } 1981 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1982 n_ISForEdges = 0; 1983 } 1984 if (!pcbddc->use_faces) { 1985 for (i=0;i<n_ISForFaces;i++) { 1986 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1987 } 1988 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1989 n_ISForFaces = 0; 1990 } 1991 1992 #if defined(PETSC_USE_DEBUG) 1993 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 1994 Also use_change_of_basis should be consistent among processors */ 1995 if (pcbddc->NullSpace) { 1996 PetscBool tbool[2],gbool[2]; 1997 1998 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 1999 pcbddc->use_change_of_basis = PETSC_TRUE; 2000 if (!ISForEdges) { 2001 pcbddc->use_change_on_faces = PETSC_TRUE; 2002 } 2003 } 2004 tbool[0] = pcbddc->use_change_of_basis; 2005 tbool[1] = pcbddc->use_change_on_faces; 2006 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2007 pcbddc->use_change_of_basis = gbool[0]; 2008 pcbddc->use_change_on_faces = gbool[1]; 2009 } 2010 #endif 2011 2012 /* check if near null space is attached to global mat */ 2013 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2014 if (nearnullsp) { 2015 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2016 /* remove any stored info */ 2017 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2018 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2019 /* store information for BDDC solver reuse */ 2020 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2021 pcbddc->onearnullspace = nearnullsp; 2022 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2023 for (i=0;i<nnsp_size;i++) { 2024 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2025 } 2026 } else { /* if near null space is not provided BDDC uses constants by default */ 2027 nnsp_size = 0; 2028 nnsp_has_cnst = PETSC_TRUE; 2029 } 2030 /* get max number of constraints on a single cc */ 2031 max_constraints = nnsp_size; 2032 if (nnsp_has_cnst) max_constraints++; 2033 2034 /* 2035 Evaluate maximum storage size needed by the procedure 2036 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2037 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2038 There can be multiple constraints per connected component 2039 */ 2040 n_vertices = 0; 2041 if (ISForVertices) { 2042 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2043 } 2044 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2045 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2046 2047 total_counts = n_ISForFaces+n_ISForEdges; 2048 total_counts *= max_constraints; 2049 total_counts += n_vertices; 2050 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2051 2052 total_counts = 0; 2053 max_size_of_constraint = 0; 2054 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2055 IS used_is; 2056 if (i<n_ISForEdges) { 2057 used_is = ISForEdges[i]; 2058 } else { 2059 used_is = ISForFaces[i-n_ISForEdges]; 2060 } 2061 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2062 total_counts += j; 2063 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2064 } 2065 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2066 2067 /* get local part of global near null space vectors */ 2068 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2069 for (k=0;k<nnsp_size;k++) { 2070 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2071 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2072 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2073 } 2074 2075 /* whether or not to skip lapack calls */ 2076 skip_lapack = PETSC_TRUE; 2077 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2078 2079 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2080 if (!skip_lapack) { 2081 PetscScalar temp_work; 2082 2083 #if defined(PETSC_MISSING_LAPACK_GESVD) 2084 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2085 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2086 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2087 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2088 #if defined(PETSC_USE_COMPLEX) 2089 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2090 #endif 2091 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2092 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2093 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2094 lwork = -1; 2095 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2096 #if !defined(PETSC_USE_COMPLEX) 2097 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2098 #else 2099 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2100 #endif 2101 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2102 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2103 #else /* on missing GESVD */ 2104 /* SVD */ 2105 PetscInt max_n,min_n; 2106 max_n = max_size_of_constraint; 2107 min_n = max_constraints; 2108 if (max_size_of_constraint < max_constraints) { 2109 min_n = max_size_of_constraint; 2110 max_n = max_constraints; 2111 } 2112 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2113 #if defined(PETSC_USE_COMPLEX) 2114 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2115 #endif 2116 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2117 lwork = -1; 2118 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2119 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2120 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2121 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2122 #if !defined(PETSC_USE_COMPLEX) 2123 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2124 #else 2125 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2126 #endif 2127 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2128 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2129 #endif /* on missing GESVD */ 2130 /* Allocate optimal workspace */ 2131 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2132 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2133 } 2134 /* Now we can loop on constraining sets */ 2135 total_counts = 0; 2136 constraints_idxs_ptr[0] = 0; 2137 constraints_data_ptr[0] = 0; 2138 /* vertices */ 2139 if (n_vertices) { 2140 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2141 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2142 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2143 for (i=0;i<n_vertices;i++) { 2144 constraints_n[total_counts] = 1; 2145 constraints_data[total_counts] = 1.0; 2146 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2147 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2148 total_counts++; 2149 } 2150 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2151 PetscBool used_vertex; 2152 for (i=0;i<n_vertices;i++) { 2153 used_vertex = PETSC_FALSE; 2154 k = 0; 2155 while (!used_vertex && k<nnsp_size) { 2156 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2157 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2158 constraints_n[total_counts] = 1; 2159 constraints_idxs[total_counts] = is_indices[i]; 2160 constraints_data[total_counts] = 1.0; 2161 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2162 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2163 total_counts++; 2164 used_vertex = PETSC_TRUE; 2165 } 2166 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2167 k++; 2168 } 2169 } 2170 } 2171 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2172 n_vertices = total_counts; 2173 } 2174 2175 /* edges and faces */ 2176 total_counts_cc = total_counts; 2177 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2178 IS used_is; 2179 PetscBool idxs_copied = PETSC_FALSE; 2180 2181 if (ncc<n_ISForEdges) { 2182 used_is = ISForEdges[ncc]; 2183 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2184 } else { 2185 used_is = ISForFaces[ncc-n_ISForEdges]; 2186 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2187 } 2188 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2189 2190 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2191 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2192 /* change of basis should not be performed on local periodic nodes */ 2193 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2194 if (nnsp_has_cnst) { 2195 PetscScalar quad_value; 2196 2197 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2198 idxs_copied = PETSC_TRUE; 2199 2200 if (!pcbddc->use_nnsp_true) { 2201 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2202 } else { 2203 quad_value = 1.0; 2204 } 2205 for (j=0;j<size_of_constraint;j++) { 2206 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2207 } 2208 temp_constraints++; 2209 total_counts++; 2210 } 2211 for (k=0;k<nnsp_size;k++) { 2212 PetscReal real_value; 2213 PetscScalar *ptr_to_data; 2214 2215 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2216 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2217 for (j=0;j<size_of_constraint;j++) { 2218 ptr_to_data[j] = array[is_indices[j]]; 2219 } 2220 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2221 /* check if array is null on the connected component */ 2222 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2223 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2224 if (real_value > 0.0) { /* keep indices and values */ 2225 temp_constraints++; 2226 total_counts++; 2227 if (!idxs_copied) { 2228 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2229 idxs_copied = PETSC_TRUE; 2230 } 2231 } 2232 } 2233 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2234 valid_constraints = temp_constraints; 2235 if (!pcbddc->use_nnsp_true && temp_constraints) { 2236 if (temp_constraints == 1) { /* just normalize the constraint */ 2237 PetscScalar norm,*ptr_to_data; 2238 2239 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2240 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2241 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2242 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2243 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2244 } else { /* perform SVD */ 2245 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2246 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2247 2248 #if defined(PETSC_MISSING_LAPACK_GESVD) 2249 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2250 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2251 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2252 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2253 from that computed using LAPACKgesvd 2254 -> This is due to a different computation of eigenvectors in LAPACKheev 2255 -> The quality of the POD-computed basis will be the same */ 2256 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2257 /* Store upper triangular part of correlation matrix */ 2258 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2259 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2260 for (j=0;j<temp_constraints;j++) { 2261 for (k=0;k<j+1;k++) { 2262 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2263 } 2264 } 2265 /* compute eigenvalues and eigenvectors of correlation matrix */ 2266 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2267 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2268 #if !defined(PETSC_USE_COMPLEX) 2269 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2270 #else 2271 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2272 #endif 2273 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2274 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2275 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2276 j = 0; 2277 while (j < temp_constraints && singular_vals[j] < tol) j++; 2278 total_counts = total_counts-j; 2279 valid_constraints = temp_constraints-j; 2280 /* scale and copy POD basis into used quadrature memory */ 2281 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2282 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2283 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2284 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2285 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2286 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2287 if (j<temp_constraints) { 2288 PetscInt ii; 2289 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2290 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2291 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2292 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2293 for (k=0;k<temp_constraints-j;k++) { 2294 for (ii=0;ii<size_of_constraint;ii++) { 2295 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2296 } 2297 } 2298 } 2299 #else /* on missing GESVD */ 2300 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2301 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2302 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2303 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2304 #if !defined(PETSC_USE_COMPLEX) 2305 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2306 #else 2307 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2308 #endif 2309 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2310 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2311 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2312 k = temp_constraints; 2313 if (k > size_of_constraint) k = size_of_constraint; 2314 j = 0; 2315 while (j < k && singular_vals[k-j-1] < tol) j++; 2316 valid_constraints = k-j; 2317 total_counts = total_counts-temp_constraints+valid_constraints; 2318 #endif /* on missing GESVD */ 2319 } 2320 } 2321 /* update pointers information */ 2322 if (valid_constraints) { 2323 constraints_n[total_counts_cc] = valid_constraints; 2324 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2325 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2326 /* set change_of_basis flag */ 2327 if (boolforchange) { 2328 PetscBTSet(change_basis,total_counts_cc); 2329 } 2330 total_counts_cc++; 2331 } 2332 } 2333 /* free workspace */ 2334 if (!skip_lapack) { 2335 ierr = PetscFree(work);CHKERRQ(ierr); 2336 #if defined(PETSC_USE_COMPLEX) 2337 ierr = PetscFree(rwork);CHKERRQ(ierr); 2338 #endif 2339 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2340 #if defined(PETSC_MISSING_LAPACK_GESVD) 2341 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2342 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2343 #endif 2344 } 2345 for (k=0;k<nnsp_size;k++) { 2346 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2347 } 2348 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2349 /* free index sets of faces, edges and vertices */ 2350 for (i=0;i<n_ISForFaces;i++) { 2351 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2352 } 2353 if (n_ISForFaces) { 2354 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2355 } 2356 for (i=0;i<n_ISForEdges;i++) { 2357 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2358 } 2359 if (n_ISForEdges) { 2360 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2361 } 2362 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2363 } else { 2364 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2365 2366 total_counts = 0; 2367 n_vertices = 0; 2368 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2369 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2370 } 2371 max_constraints = 0; 2372 total_counts_cc = 0; 2373 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2374 total_counts += pcbddc->adaptive_constraints_n[i]; 2375 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2376 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2377 } 2378 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2379 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2380 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2381 constraints_data = pcbddc->adaptive_constraints_data; 2382 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2383 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2384 total_counts_cc = 0; 2385 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2386 if (pcbddc->adaptive_constraints_n[i]) { 2387 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2388 } 2389 } 2390 #if 0 2391 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2392 for (i=0;i<total_counts_cc;i++) { 2393 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2394 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2395 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2396 printf(" %d",constraints_idxs[j]); 2397 } 2398 printf("\n"); 2399 printf("number of cc: %d\n",constraints_n[i]); 2400 } 2401 for (i=0;i<n_vertices;i++) { 2402 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2403 } 2404 for (i=0;i<sub_schurs->n_subs;i++) { 2405 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2406 } 2407 #endif 2408 2409 max_size_of_constraint = 0; 2410 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2411 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2412 /* Change of basis */ 2413 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2414 if (pcbddc->use_change_of_basis) { 2415 for (i=0;i<sub_schurs->n_subs;i++) { 2416 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2417 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2418 } 2419 } 2420 } 2421 } 2422 pcbddc->local_primal_size = total_counts; 2423 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2424 2425 /* map constraints_idxs in boundary numbering */ 2426 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2427 if (i != constraints_idxs_ptr[total_counts_cc]) { 2428 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2429 } 2430 2431 /* Create constraint matrix */ 2432 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2433 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2434 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2435 2436 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2437 /* determine if a QR strategy is needed for change of basis */ 2438 qr_needed = PETSC_FALSE; 2439 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2440 total_primal_vertices=0; 2441 pcbddc->local_primal_size_cc = 0; 2442 for (i=0;i<total_counts_cc;i++) { 2443 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2444 if (size_of_constraint == 1) { 2445 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2446 pcbddc->local_primal_size_cc += 1; 2447 } else if (PetscBTLookup(change_basis,i)) { 2448 for (k=0;k<constraints_n[i];k++) { 2449 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2450 } 2451 pcbddc->local_primal_size_cc += constraints_n[i]; 2452 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2453 PetscBTSet(qr_needed_idx,i); 2454 qr_needed = PETSC_TRUE; 2455 } 2456 } else { 2457 pcbddc->local_primal_size_cc += 1; 2458 } 2459 } 2460 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2461 pcbddc->n_vertices = total_primal_vertices; 2462 /* permute indices in order to have a sorted set of vertices */ 2463 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2464 2465 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2466 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2467 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2468 2469 /* nonzero structure of constraint matrix */ 2470 /* and get reference dof for local constraints */ 2471 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2472 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2473 2474 j = total_primal_vertices; 2475 total_counts = total_primal_vertices; 2476 cum = total_primal_vertices; 2477 for (i=n_vertices;i<total_counts_cc;i++) { 2478 if (!PetscBTLookup(change_basis,i)) { 2479 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2480 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2481 cum++; 2482 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2483 for (k=0;k<constraints_n[i];k++) { 2484 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2485 nnz[j+k] = size_of_constraint; 2486 } 2487 j += constraints_n[i]; 2488 } 2489 } 2490 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2491 ierr = PetscFree(nnz);CHKERRQ(ierr); 2492 2493 /* set values in constraint matrix */ 2494 for (i=0;i<total_primal_vertices;i++) { 2495 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2496 } 2497 total_counts = total_primal_vertices; 2498 for (i=n_vertices;i<total_counts_cc;i++) { 2499 if (!PetscBTLookup(change_basis,i)) { 2500 PetscInt *cols; 2501 2502 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2503 cols = constraints_idxs+constraints_idxs_ptr[i]; 2504 for (k=0;k<constraints_n[i];k++) { 2505 PetscInt row = total_counts+k; 2506 PetscScalar *vals; 2507 2508 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2509 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2510 } 2511 total_counts += constraints_n[i]; 2512 } 2513 } 2514 /* assembling */ 2515 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2516 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2517 2518 /* 2519 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2520 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2521 */ 2522 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2523 if (pcbddc->use_change_of_basis) { 2524 /* dual and primal dofs on a single cc */ 2525 PetscInt dual_dofs,primal_dofs; 2526 /* working stuff for GEQRF */ 2527 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2528 PetscBLASInt lqr_work; 2529 /* working stuff for UNGQR */ 2530 PetscScalar *gqr_work,lgqr_work_t; 2531 PetscBLASInt lgqr_work; 2532 /* working stuff for TRTRS */ 2533 PetscScalar *trs_rhs; 2534 PetscBLASInt Blas_NRHS; 2535 /* pointers for values insertion into change of basis matrix */ 2536 PetscInt *start_rows,*start_cols; 2537 PetscScalar *start_vals; 2538 /* working stuff for values insertion */ 2539 PetscBT is_primal; 2540 PetscInt *aux_primal_numbering_B; 2541 /* matrix sizes */ 2542 PetscInt global_size,local_size; 2543 /* temporary change of basis */ 2544 Mat localChangeOfBasisMatrix; 2545 /* extra space for debugging */ 2546 PetscScalar *dbg_work; 2547 2548 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2549 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2550 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2551 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2552 /* nonzeros for local mat */ 2553 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2554 for (i=0;i<pcis->n;i++) nnz[i]=1; 2555 for (i=n_vertices;i<total_counts_cc;i++) { 2556 if (PetscBTLookup(change_basis,i)) { 2557 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2558 if (PetscBTLookup(qr_needed_idx,i)) { 2559 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2560 } else { 2561 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2562 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2563 } 2564 } 2565 } 2566 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2567 ierr = PetscFree(nnz);CHKERRQ(ierr); 2568 /* Set initial identity in the matrix */ 2569 for (i=0;i<pcis->n;i++) { 2570 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2571 } 2572 2573 if (pcbddc->dbg_flag) { 2574 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2575 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2576 } 2577 2578 2579 /* Now we loop on the constraints which need a change of basis */ 2580 /* 2581 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2582 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2583 2584 Basic blocks of change of basis matrix T computed by 2585 2586 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2587 2588 | 1 0 ... 0 s_1/S | 2589 | 0 1 ... 0 s_2/S | 2590 | ... | 2591 | 0 ... 1 s_{n-1}/S | 2592 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2593 2594 with S = \sum_{i=1}^n s_i^2 2595 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2596 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2597 2598 - QR decomposition of constraints otherwise 2599 */ 2600 if (qr_needed) { 2601 /* space to store Q */ 2602 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2603 /* first we issue queries for optimal work */ 2604 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2605 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2606 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2607 lqr_work = -1; 2608 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2609 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2610 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2611 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2612 lgqr_work = -1; 2613 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2614 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2615 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2616 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2617 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2618 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2619 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2620 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2621 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2622 /* array to store scaling factors for reflectors */ 2623 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2624 /* array to store rhs and solution of triangular solver */ 2625 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2626 /* allocating workspace for check */ 2627 if (pcbddc->dbg_flag) { 2628 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2629 } 2630 } 2631 /* array to store whether a node is primal or not */ 2632 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2633 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2634 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2635 if (i != total_primal_vertices) { 2636 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2637 } 2638 for (i=0;i<total_primal_vertices;i++) { 2639 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2640 } 2641 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2642 2643 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2644 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2645 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2646 if (PetscBTLookup(change_basis,total_counts)) { 2647 /* get constraint info */ 2648 primal_dofs = constraints_n[total_counts]; 2649 dual_dofs = size_of_constraint-primal_dofs; 2650 2651 if (pcbddc->dbg_flag) { 2652 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2653 } 2654 2655 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2656 2657 /* copy quadrature constraints for change of basis check */ 2658 if (pcbddc->dbg_flag) { 2659 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2660 } 2661 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2662 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2663 2664 /* compute QR decomposition of constraints */ 2665 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2666 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2667 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2668 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2669 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2670 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2671 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2672 2673 /* explictly compute R^-T */ 2674 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2675 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2676 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2677 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2678 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2679 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2680 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2681 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2682 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2683 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2684 2685 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2686 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2687 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2688 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2689 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2690 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2691 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2692 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2693 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2694 2695 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2696 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2697 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2698 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2699 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2700 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2701 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2702 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2703 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2704 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2705 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2706 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2707 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2708 2709 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2710 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2711 /* insert cols for primal dofs */ 2712 for (j=0;j<primal_dofs;j++) { 2713 start_vals = &qr_basis[j*size_of_constraint]; 2714 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2715 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2716 } 2717 /* insert cols for dual dofs */ 2718 for (j=0,k=0;j<dual_dofs;k++) { 2719 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2720 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2721 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2722 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2723 j++; 2724 } 2725 } 2726 2727 /* check change of basis */ 2728 if (pcbddc->dbg_flag) { 2729 PetscInt ii,jj; 2730 PetscBool valid_qr=PETSC_TRUE; 2731 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2732 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2733 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2734 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2735 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2736 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2737 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2738 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2739 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2740 for (jj=0;jj<size_of_constraint;jj++) { 2741 for (ii=0;ii<primal_dofs;ii++) { 2742 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2743 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2744 } 2745 } 2746 if (!valid_qr) { 2747 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2748 for (jj=0;jj<size_of_constraint;jj++) { 2749 for (ii=0;ii<primal_dofs;ii++) { 2750 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2751 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2752 } 2753 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2754 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2755 } 2756 } 2757 } 2758 } else { 2759 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2760 } 2761 } 2762 } else { /* simple transformation block */ 2763 PetscInt row,col; 2764 PetscScalar val,norm; 2765 2766 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2767 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2768 for (j=0;j<size_of_constraint;j++) { 2769 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2770 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2771 if (!PetscBTLookup(is_primal,row_B)) { 2772 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2773 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2774 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2775 } else { 2776 for (k=0;k<size_of_constraint;k++) { 2777 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2778 if (row != col) { 2779 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2780 } else { 2781 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2782 } 2783 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2784 } 2785 } 2786 } 2787 if (pcbddc->dbg_flag) { 2788 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2789 } 2790 } 2791 } else { 2792 if (pcbddc->dbg_flag) { 2793 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2794 } 2795 } 2796 } 2797 2798 /* free workspace */ 2799 if (qr_needed) { 2800 if (pcbddc->dbg_flag) { 2801 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2802 } 2803 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2804 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2805 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2806 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2807 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2808 } 2809 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2810 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2811 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2812 2813 /* assembling of global change of variable */ 2814 { 2815 Mat tmat; 2816 PetscInt bs; 2817 2818 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2819 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2820 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2821 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2822 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2823 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2824 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2825 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2826 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2827 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2828 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2829 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2830 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2831 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2832 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2833 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2834 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2835 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2836 } 2837 /* check */ 2838 if (pcbddc->dbg_flag) { 2839 PetscReal error; 2840 Vec x,x_change; 2841 2842 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2843 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2844 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2845 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2846 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2847 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2848 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2849 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2850 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2851 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2852 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2853 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2854 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2855 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2856 ierr = VecDestroy(&x);CHKERRQ(ierr); 2857 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2858 } 2859 2860 /* adapt sub_schurs computed (if any) */ 2861 if (pcbddc->use_deluxe_scaling) { 2862 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2863 if (sub_schurs->S_Ej_all) { 2864 Mat S_new,tmat; 2865 IS is_all_N; 2866 2867 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2868 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2869 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2870 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2871 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2872 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2873 sub_schurs->S_Ej_all = S_new; 2874 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2875 if (sub_schurs->sum_S_Ej_all) { 2876 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2877 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2878 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2879 sub_schurs->sum_S_Ej_all = S_new; 2880 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2881 } 2882 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2883 } 2884 } 2885 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2886 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2887 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2888 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2889 } 2890 2891 /* set up change of basis context */ 2892 if (pcbddc->ChangeOfBasisMatrix) { 2893 PCBDDCChange_ctx change_ctx; 2894 2895 if (!pcbddc->new_global_mat) { 2896 PetscInt global_size,local_size; 2897 2898 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2899 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2900 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2901 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2902 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2903 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2904 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2905 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2906 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2907 } else { 2908 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2909 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2910 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2911 } 2912 if (!pcbddc->user_ChangeOfBasisMatrix) { 2913 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2914 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2915 } else { 2916 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2917 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2918 } 2919 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2920 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2921 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2922 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2923 } 2924 2925 /* check if a new primal space has been introduced */ 2926 pcbddc->new_primal_space_local = PETSC_TRUE; 2927 if (olocal_primal_size == pcbddc->local_primal_size) { 2928 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2929 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2930 if (!pcbddc->new_primal_space_local) { 2931 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2932 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2933 } 2934 } 2935 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2936 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2937 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2938 2939 /* flush dbg viewer */ 2940 if (pcbddc->dbg_flag) { 2941 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2942 } 2943 2944 /* free workspace */ 2945 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2946 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2947 if (!pcbddc->adaptive_selection) { 2948 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2949 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2950 } else { 2951 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2952 pcbddc->adaptive_constraints_idxs_ptr, 2953 pcbddc->adaptive_constraints_data_ptr, 2954 pcbddc->adaptive_constraints_idxs, 2955 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2956 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2957 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2958 } 2959 PetscFunctionReturn(0); 2960 } 2961 2962 #undef __FUNCT__ 2963 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2964 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2965 { 2966 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2967 PC_IS *pcis = (PC_IS*)pc->data; 2968 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2969 PetscInt ierr,i,vertex_size,N; 2970 PetscViewer viewer=pcbddc->dbg_viewer; 2971 2972 PetscFunctionBegin; 2973 /* Reset previously computed graph */ 2974 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2975 /* Init local Graph struct */ 2976 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 2977 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 2978 2979 /* Check validity of the csr graph passed in by the user */ 2980 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2981 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2982 } 2983 2984 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2985 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2986 PetscInt *xadj,*adjncy; 2987 PetscInt nvtxs; 2988 PetscBool flg_row=PETSC_FALSE; 2989 2990 if (pcbddc->use_local_adj) { 2991 2992 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2993 if (flg_row) { 2994 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2995 pcbddc->computed_rowadj = PETSC_TRUE; 2996 } 2997 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2998 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 2999 IS is_dummy; 3000 ISLocalToGlobalMapping l2gmap_dummy; 3001 PetscInt j,sum; 3002 PetscInt *cxadj,*cadjncy; 3003 const PetscInt *idxs; 3004 PCBDDCGraph graph; 3005 PetscBT is_on_boundary; 3006 3007 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3008 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3009 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3010 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3011 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3012 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3013 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3014 if (flg_row) { 3015 graph->xadj = xadj; 3016 graph->adjncy = adjncy; 3017 } 3018 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3019 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3020 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3021 3022 if (pcbddc->dbg_flag) { 3023 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3024 for (i=0;i<graph->ncc;i++) { 3025 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3026 } 3027 } 3028 3029 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3030 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3031 for (i=0;i<pcis->n_B;i++) { 3032 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3033 } 3034 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3035 3036 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3037 sum = 0; 3038 for (i=0;i<graph->ncc;i++) { 3039 PetscInt sizecc = 0; 3040 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3041 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3042 sizecc++; 3043 } 3044 } 3045 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3046 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3047 cxadj[graph->queue[j]] = sizecc; 3048 } 3049 } 3050 sum += sizecc*sizecc; 3051 } 3052 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3053 sum = 0; 3054 for (i=0;i<pcis->n;i++) { 3055 PetscInt temp = cxadj[i]; 3056 cxadj[i] = sum; 3057 sum += temp; 3058 } 3059 cxadj[pcis->n] = sum; 3060 for (i=0;i<graph->ncc;i++) { 3061 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3062 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3063 PetscInt k,sizecc = 0; 3064 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3065 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3066 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3067 sizecc++; 3068 } 3069 } 3070 } 3071 } 3072 } 3073 if (sum) { 3074 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3075 } else { 3076 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3077 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3078 } 3079 graph->xadj = 0; 3080 graph->adjncy = 0; 3081 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3082 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3083 } 3084 } 3085 if (pcbddc->dbg_flag) { 3086 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3087 } 3088 3089 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3090 vertex_size = 1; 3091 if (pcbddc->user_provided_isfordofs) { 3092 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3093 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3094 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3095 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3096 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3097 } 3098 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3099 pcbddc->n_ISForDofs = 0; 3100 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3101 } 3102 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3103 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3104 } else { 3105 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3106 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3107 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3108 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3109 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3110 } 3111 } 3112 } 3113 3114 /* Setup of Graph */ 3115 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3116 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3117 } 3118 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3119 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3120 } 3121 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3122 3123 /* Graph's connected components analysis */ 3124 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3125 3126 /* print some info to stdout */ 3127 if (pcbddc->dbg_flag) { 3128 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3129 } 3130 3131 /* mark topography has done */ 3132 pcbddc->recompute_topography = PETSC_FALSE; 3133 PetscFunctionReturn(0); 3134 } 3135 3136 #undef __FUNCT__ 3137 #define __FUNCT__ "PCBDDCSubsetNumbering" 3138 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3139 { 3140 Vec local_vec,global_vec; 3141 IS seqis,paris; 3142 VecScatter scatter_ctx; 3143 PetscScalar *array; 3144 PetscInt *temp_global_dofs; 3145 PetscScalar globalsum; 3146 PetscInt i,j,s; 3147 PetscInt nlocals,first_index,old_index,max_local,max_global; 3148 PetscMPIInt rank_prec_comm,size_prec_comm; 3149 PetscInt *dof_sizes,*dof_displs; 3150 PetscBool first_found; 3151 PetscErrorCode ierr; 3152 3153 PetscFunctionBegin; 3154 /* mpi buffers */ 3155 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3156 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3157 j = ( !rank_prec_comm ? size_prec_comm : 0); 3158 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3159 /* get maximum size of subset */ 3160 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3161 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3162 max_local = 0; 3163 for (i=0;i<n_local_dofs;i++) { 3164 if (max_local < temp_global_dofs[i] ) { 3165 max_local = temp_global_dofs[i]; 3166 } 3167 } 3168 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3169 max_global++; 3170 max_local = 0; 3171 for (i=0;i<n_local_dofs;i++) { 3172 if (max_local < local_dofs[i] ) { 3173 max_local = local_dofs[i]; 3174 } 3175 } 3176 max_local++; 3177 /* allocate workspace */ 3178 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3179 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3180 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3181 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3182 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3183 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3184 /* create scatter */ 3185 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3186 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3187 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3188 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3189 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3190 /* init array */ 3191 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3192 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3193 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3194 if (local_dofs_mult) { 3195 for (i=0;i<n_local_dofs;i++) { 3196 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3197 } 3198 } else { 3199 for (i=0;i<n_local_dofs;i++) { 3200 array[local_dofs[i]]=1.0; 3201 } 3202 } 3203 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3204 /* scatter into global vec and get total number of global dofs */ 3205 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3206 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3207 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3208 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3209 /* Fill global_vec with cumulative function for global numbering */ 3210 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3211 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3212 nlocals = 0; 3213 first_index = -1; 3214 first_found = PETSC_FALSE; 3215 for (i=0;i<s;i++) { 3216 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3217 first_found = PETSC_TRUE; 3218 first_index = i; 3219 } 3220 nlocals += (PetscInt)PetscRealPart(array[i]); 3221 } 3222 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3223 if (!rank_prec_comm) { 3224 dof_displs[0]=0; 3225 for (i=1;i<size_prec_comm;i++) { 3226 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3227 } 3228 } 3229 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3230 if (first_found) { 3231 array[first_index] += (PetscScalar)nlocals; 3232 old_index = first_index; 3233 for (i=first_index+1;i<s;i++) { 3234 if (PetscRealPart(array[i]) > 0.1) { 3235 array[i] += array[old_index]; 3236 old_index = i; 3237 } 3238 } 3239 } 3240 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3241 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3242 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3243 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3244 /* get global ordering of local dofs */ 3245 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3246 if (local_dofs_mult) { 3247 for (i=0;i<n_local_dofs;i++) { 3248 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3249 } 3250 } else { 3251 for (i=0;i<n_local_dofs;i++) { 3252 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3253 } 3254 } 3255 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3256 /* free workspace */ 3257 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3258 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3259 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3260 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3261 /* return pointer to global ordering of local dofs */ 3262 *global_numbering_subset = temp_global_dofs; 3263 PetscFunctionReturn(0); 3264 } 3265 3266 #undef __FUNCT__ 3267 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3268 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3269 { 3270 PetscInt i,j; 3271 PetscScalar *alphas; 3272 PetscErrorCode ierr; 3273 3274 PetscFunctionBegin; 3275 /* this implements stabilized Gram-Schmidt */ 3276 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3277 for (i=0;i<n;i++) { 3278 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3279 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3280 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3281 } 3282 ierr = PetscFree(alphas);CHKERRQ(ierr); 3283 PetscFunctionReturn(0); 3284 } 3285 3286 #undef __FUNCT__ 3287 #define __FUNCT__ "MatISGetSubassemblingPattern" 3288 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3289 { 3290 Mat subdomain_adj; 3291 IS new_ranks,ranks_send_to; 3292 MatPartitioning partitioner; 3293 Mat_IS *matis; 3294 PetscInt n_neighs,*neighs,*n_shared,**shared; 3295 PetscInt prank; 3296 PetscMPIInt size,rank,color; 3297 PetscInt *xadj,*adjncy,*oldranks; 3298 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3299 PetscInt i,local_size,threshold=0; 3300 PetscErrorCode ierr; 3301 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3302 PetscSubcomm subcomm; 3303 3304 PetscFunctionBegin; 3305 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3306 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3307 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3308 3309 /* Get info on mapping */ 3310 matis = (Mat_IS*)(mat->data); 3311 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3312 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3313 3314 /* build local CSR graph of subdomains' connectivity */ 3315 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3316 xadj[0] = 0; 3317 xadj[1] = PetscMax(n_neighs-1,0); 3318 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3319 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3320 3321 if (threshold) { 3322 PetscInt xadj_count = 0; 3323 for (i=1;i<n_neighs;i++) { 3324 if (n_shared[i] > threshold) { 3325 adjncy[xadj_count] = neighs[i]; 3326 adjncy_wgt[xadj_count] = n_shared[i]; 3327 xadj_count++; 3328 } 3329 } 3330 xadj[1] = xadj_count; 3331 } else { 3332 if (xadj[1]) { 3333 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3334 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3335 } 3336 } 3337 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3338 if (use_square) { 3339 for (i=0;i<xadj[1];i++) { 3340 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3341 } 3342 } 3343 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3344 3345 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3346 3347 /* 3348 Restrict work on active processes only. 3349 */ 3350 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3351 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3352 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3353 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3354 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3355 if (color) { 3356 ierr = PetscFree(xadj);CHKERRQ(ierr); 3357 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3358 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3359 } else { 3360 PetscInt coarsening_ratio; 3361 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3362 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3363 prank = rank; 3364 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3365 /* 3366 for (i=0;i<size;i++) { 3367 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3368 } 3369 */ 3370 for (i=0;i<xadj[1];i++) { 3371 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3372 } 3373 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3374 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3375 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3376 3377 /* Partition */ 3378 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3379 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3380 if (use_vwgt) { 3381 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3382 v_wgt[0] = local_size; 3383 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3384 } 3385 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3386 coarsening_ratio = size/n_subdomains; 3387 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3388 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3389 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3390 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3391 3392 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3393 if (contiguous) { 3394 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3395 } else { 3396 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3397 } 3398 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3399 /* clean up */ 3400 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3401 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3402 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3403 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3404 } 3405 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3406 3407 /* assemble parallel IS for sends */ 3408 i = 1; 3409 if (color) i=0; 3410 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3411 3412 /* get back IS */ 3413 *is_sends = ranks_send_to; 3414 PetscFunctionReturn(0); 3415 } 3416 3417 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3418 3419 #undef __FUNCT__ 3420 #define __FUNCT__ "MatISSubassemble" 3421 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3422 { 3423 Mat local_mat; 3424 Mat_IS *matis; 3425 IS is_sends_internal; 3426 PetscInt rows,cols,new_local_rows; 3427 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3428 PetscBool ismatis,isdense,newisdense,destroy_mat; 3429 ISLocalToGlobalMapping l2gmap; 3430 PetscInt* l2gmap_indices; 3431 const PetscInt* is_indices; 3432 MatType new_local_type; 3433 /* buffers */ 3434 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3435 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3436 PetscInt *recv_buffer_idxs_local; 3437 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3438 /* MPI */ 3439 MPI_Comm comm,comm_n; 3440 PetscSubcomm subcomm; 3441 PetscMPIInt n_sends,n_recvs,commsize; 3442 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3443 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3444 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3445 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3446 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3447 PetscErrorCode ierr; 3448 3449 PetscFunctionBegin; 3450 /* TODO: add missing checks */ 3451 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3452 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3453 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3454 PetscValidLogicalCollectiveInt(mat,nis,7); 3455 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3456 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3457 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3458 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3459 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3460 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3461 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3462 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3463 PetscInt mrows,mcols,mnrows,mncols; 3464 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3465 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3466 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3467 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3468 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3469 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3470 } 3471 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3472 PetscValidLogicalCollectiveInt(mat,bs,0); 3473 /* prepare IS for sending if not provided */ 3474 if (!is_sends) { 3475 PetscBool pcontig = PETSC_TRUE; 3476 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3477 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3478 } else { 3479 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3480 is_sends_internal = is_sends; 3481 } 3482 3483 /* get pointer of MATIS data */ 3484 matis = (Mat_IS*)mat->data; 3485 3486 /* get comm */ 3487 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3488 3489 /* compute number of sends */ 3490 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3491 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3492 3493 /* compute number of receives */ 3494 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3495 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3496 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3497 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3498 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3499 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3500 ierr = PetscFree(iflags);CHKERRQ(ierr); 3501 3502 /* restrict comm if requested */ 3503 subcomm = 0; 3504 destroy_mat = PETSC_FALSE; 3505 if (restrict_comm) { 3506 PetscMPIInt color,subcommsize; 3507 3508 color = 0; 3509 if (!n_recvs && n_sends) color = 1; /* processes sending and not receiving anything will not partecipate in new comm */ 3510 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3511 subcommsize = commsize - subcommsize; 3512 /* check if reuse has been requested */ 3513 if (reuse == MAT_REUSE_MATRIX) { 3514 if (*mat_n) { 3515 PetscMPIInt subcommsize2; 3516 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3517 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3518 comm_n = PetscObjectComm((PetscObject)*mat_n); 3519 } else { 3520 comm_n = PETSC_COMM_SELF; 3521 } 3522 } else { /* MAT_INITIAL_MATRIX */ 3523 PetscMPIInt rank; 3524 3525 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3526 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3527 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3528 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3529 comm_n = PetscSubcommChild(subcomm); 3530 } 3531 /* flag to destroy *mat_n if not significative */ 3532 if (color) destroy_mat = PETSC_TRUE; 3533 } else { 3534 comm_n = comm; 3535 } 3536 3537 /* prepare send/receive buffers */ 3538 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3539 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3540 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3541 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3542 if (nis) { 3543 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3544 } 3545 3546 /* Get data from local matrices */ 3547 if (!isdense) { 3548 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3549 /* TODO: See below some guidelines on how to prepare the local buffers */ 3550 /* 3551 send_buffer_vals should contain the raw values of the local matrix 3552 send_buffer_idxs should contain: 3553 - MatType_PRIVATE type 3554 - PetscInt size_of_l2gmap 3555 - PetscInt global_row_indices[size_of_l2gmap] 3556 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3557 */ 3558 } else { 3559 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3560 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3561 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3562 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3563 send_buffer_idxs[1] = i; 3564 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3565 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3566 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3567 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3568 for (i=0;i<n_sends;i++) { 3569 ilengths_vals[is_indices[i]] = len*len; 3570 ilengths_idxs[is_indices[i]] = len+2; 3571 } 3572 } 3573 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3574 /* additional is (if any) */ 3575 if (nis) { 3576 PetscMPIInt psum; 3577 PetscInt j; 3578 for (j=0,psum=0;j<nis;j++) { 3579 PetscInt plen; 3580 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3581 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3582 psum += len+1; /* indices + lenght */ 3583 } 3584 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3585 for (j=0,psum=0;j<nis;j++) { 3586 PetscInt plen; 3587 const PetscInt *is_array_idxs; 3588 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3589 send_buffer_idxs_is[psum] = plen; 3590 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3591 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3592 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3593 psum += plen+1; /* indices + lenght */ 3594 } 3595 for (i=0;i<n_sends;i++) { 3596 ilengths_idxs_is[is_indices[i]] = psum; 3597 } 3598 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3599 } 3600 3601 buf_size_idxs = 0; 3602 buf_size_vals = 0; 3603 buf_size_idxs_is = 0; 3604 for (i=0;i<n_recvs;i++) { 3605 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3606 buf_size_vals += (PetscInt)olengths_vals[i]; 3607 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3608 } 3609 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3610 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3611 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3612 3613 /* get new tags for clean communications */ 3614 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3615 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3616 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3617 3618 /* allocate for requests */ 3619 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3620 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3621 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3622 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3623 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3624 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3625 3626 /* communications */ 3627 ptr_idxs = recv_buffer_idxs; 3628 ptr_vals = recv_buffer_vals; 3629 ptr_idxs_is = recv_buffer_idxs_is; 3630 for (i=0;i<n_recvs;i++) { 3631 source_dest = onodes[i]; 3632 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3633 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3634 ptr_idxs += olengths_idxs[i]; 3635 ptr_vals += olengths_vals[i]; 3636 if (nis) { 3637 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3638 ptr_idxs_is += olengths_idxs_is[i]; 3639 } 3640 } 3641 for (i=0;i<n_sends;i++) { 3642 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3643 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3644 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3645 if (nis) { 3646 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3647 } 3648 } 3649 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3650 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3651 3652 /* assemble new l2g map */ 3653 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3654 ptr_idxs = recv_buffer_idxs; 3655 new_local_rows = 0; 3656 for (i=0;i<n_recvs;i++) { 3657 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3658 ptr_idxs += olengths_idxs[i]; 3659 } 3660 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3661 ptr_idxs = recv_buffer_idxs; 3662 new_local_rows = 0; 3663 for (i=0;i<n_recvs;i++) { 3664 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3665 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3666 ptr_idxs += olengths_idxs[i]; 3667 } 3668 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3669 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3670 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3671 3672 /* infer new local matrix type from received local matrices type */ 3673 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3674 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3675 if (n_recvs) { 3676 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3677 ptr_idxs = recv_buffer_idxs; 3678 for (i=0;i<n_recvs;i++) { 3679 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3680 new_local_type_private = MATAIJ_PRIVATE; 3681 break; 3682 } 3683 ptr_idxs += olengths_idxs[i]; 3684 } 3685 switch (new_local_type_private) { 3686 case MATDENSE_PRIVATE: 3687 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3688 new_local_type = MATSEQAIJ; 3689 bs = 1; 3690 } else { /* if I receive only 1 dense matrix */ 3691 new_local_type = MATSEQDENSE; 3692 bs = 1; 3693 } 3694 break; 3695 case MATAIJ_PRIVATE: 3696 new_local_type = MATSEQAIJ; 3697 bs = 1; 3698 break; 3699 case MATBAIJ_PRIVATE: 3700 new_local_type = MATSEQBAIJ; 3701 break; 3702 case MATSBAIJ_PRIVATE: 3703 new_local_type = MATSEQSBAIJ; 3704 break; 3705 default: 3706 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3707 break; 3708 } 3709 } else { /* by default, new_local_type is seqdense */ 3710 new_local_type = MATSEQDENSE; 3711 bs = 1; 3712 } 3713 3714 /* create MATIS object if needed */ 3715 if (reuse == MAT_INITIAL_MATRIX) { 3716 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3717 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3718 } else { 3719 /* it also destroys the local matrices */ 3720 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3721 } 3722 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3723 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3724 3725 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3726 3727 /* Global to local map of received indices */ 3728 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3729 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3730 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3731 3732 /* restore attributes -> type of incoming data and its size */ 3733 buf_size_idxs = 0; 3734 for (i=0;i<n_recvs;i++) { 3735 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3736 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3737 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3738 } 3739 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3740 3741 /* set preallocation */ 3742 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3743 if (!newisdense) { 3744 PetscInt *new_local_nnz=0; 3745 3746 ptr_vals = recv_buffer_vals; 3747 ptr_idxs = recv_buffer_idxs_local; 3748 if (n_recvs) { 3749 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3750 } 3751 for (i=0;i<n_recvs;i++) { 3752 PetscInt j; 3753 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3754 for (j=0;j<*(ptr_idxs+1);j++) { 3755 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3756 } 3757 } else { 3758 /* TODO */ 3759 } 3760 ptr_idxs += olengths_idxs[i]; 3761 } 3762 if (new_local_nnz) { 3763 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3764 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3765 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3766 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3767 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3768 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3769 } else { 3770 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3771 } 3772 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3773 } else { 3774 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3775 } 3776 3777 /* set values */ 3778 ptr_vals = recv_buffer_vals; 3779 ptr_idxs = recv_buffer_idxs_local; 3780 for (i=0;i<n_recvs;i++) { 3781 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3782 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3783 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3784 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3785 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3786 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3787 } else { 3788 /* TODO */ 3789 } 3790 ptr_idxs += olengths_idxs[i]; 3791 ptr_vals += olengths_vals[i]; 3792 } 3793 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3794 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3795 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3796 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3797 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3798 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3799 3800 #if 0 3801 if (!restrict_comm) { /* check */ 3802 Vec lvec,rvec; 3803 PetscReal infty_error; 3804 3805 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3806 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3807 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3808 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3809 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3810 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3811 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3812 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3813 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3814 } 3815 #endif 3816 3817 /* assemble new additional is (if any) */ 3818 if (nis) { 3819 PetscInt **temp_idxs,*count_is,j,psum; 3820 3821 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3822 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3823 ptr_idxs = recv_buffer_idxs_is; 3824 psum = 0; 3825 for (i=0;i<n_recvs;i++) { 3826 for (j=0;j<nis;j++) { 3827 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3828 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3829 psum += plen; 3830 ptr_idxs += plen+1; /* shift pointer to received data */ 3831 } 3832 } 3833 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3834 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3835 for (i=1;i<nis;i++) { 3836 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3837 } 3838 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3839 ptr_idxs = recv_buffer_idxs_is; 3840 for (i=0;i<n_recvs;i++) { 3841 for (j=0;j<nis;j++) { 3842 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3843 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3844 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3845 ptr_idxs += plen+1; /* shift pointer to received data */ 3846 } 3847 } 3848 for (i=0;i<nis;i++) { 3849 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3850 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3851 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3852 } 3853 ierr = PetscFree(count_is);CHKERRQ(ierr); 3854 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3855 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3856 } 3857 /* free workspace */ 3858 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3859 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3860 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3861 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3862 if (isdense) { 3863 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3864 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3865 } else { 3866 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3867 } 3868 if (nis) { 3869 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3870 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3871 } 3872 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3873 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3874 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3875 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3876 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3877 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3878 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3879 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3880 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3881 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3882 ierr = PetscFree(onodes);CHKERRQ(ierr); 3883 if (nis) { 3884 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3885 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3886 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3887 } 3888 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3889 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3890 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3891 for (i=0;i<nis;i++) { 3892 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3893 } 3894 } 3895 PetscFunctionReturn(0); 3896 } 3897 3898 /* temporary hack into ksp private data structure */ 3899 #include <petsc/private/kspimpl.h> 3900 3901 #undef __FUNCT__ 3902 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3903 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3904 { 3905 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3906 PC_IS *pcis = (PC_IS*)pc->data; 3907 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3908 MatNullSpace CoarseNullSpace=NULL; 3909 ISLocalToGlobalMapping coarse_islg; 3910 IS coarse_is,*isarray; 3911 PetscInt i,im_active=-1,active_procs=-1; 3912 PetscInt nis,nisdofs,nisneu; 3913 PC pc_temp; 3914 PCType coarse_pc_type; 3915 KSPType coarse_ksp_type; 3916 PetscBool multilevel_requested,multilevel_allowed; 3917 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3918 Mat t_coarse_mat_is; 3919 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3920 PetscMPIInt all_procs; 3921 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3922 PetscBool compute_vecs = PETSC_FALSE; 3923 PetscScalar *array; 3924 PetscErrorCode ierr; 3925 3926 PetscFunctionBegin; 3927 /* Assign global numbering to coarse dofs */ 3928 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3929 PetscInt ocoarse_size; 3930 compute_vecs = PETSC_TRUE; 3931 ocoarse_size = pcbddc->coarse_size; 3932 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3933 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3934 /* see if we can avoid some work */ 3935 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3936 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3937 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3938 coarse_reuse = PETSC_FALSE; 3939 } else { /* we can safely reuse already computed coarse matrix */ 3940 coarse_reuse = PETSC_TRUE; 3941 } 3942 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3943 coarse_reuse = PETSC_FALSE; 3944 } 3945 /* reset any subassembling information */ 3946 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3947 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3948 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3949 coarse_reuse = PETSC_TRUE; 3950 } 3951 3952 /* count "active" (i.e. with positive local size) and "void" processes */ 3953 im_active = !!(pcis->n); 3954 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3955 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3956 void_procs = all_procs-active_procs; 3957 csin_type_simple = PETSC_TRUE; 3958 redist = PETSC_FALSE; 3959 if (pcbddc->current_level && void_procs) { 3960 csin_ml = PETSC_TRUE; 3961 ncoarse_ml = void_procs; 3962 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3963 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3964 csin_ds = PETSC_TRUE; 3965 ncoarse_ds = pcbddc->redistribute_coarse; 3966 redist = PETSC_TRUE; 3967 } else { 3968 csin_ds = PETSC_TRUE; 3969 ncoarse_ds = active_procs; 3970 redist = PETSC_TRUE; 3971 } 3972 } else { 3973 csin_ml = PETSC_FALSE; 3974 ncoarse_ml = all_procs; 3975 if (void_procs) { 3976 csin_ds = PETSC_TRUE; 3977 ncoarse_ds = void_procs; 3978 csin_type_simple = PETSC_FALSE; 3979 } else { 3980 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3981 csin_ds = PETSC_TRUE; 3982 ncoarse_ds = pcbddc->redistribute_coarse; 3983 redist = PETSC_TRUE; 3984 } else { 3985 csin_ds = PETSC_FALSE; 3986 ncoarse_ds = all_procs; 3987 } 3988 } 3989 } 3990 3991 /* 3992 test if we can go multilevel: three conditions must be satisfied: 3993 - we have not exceeded the number of levels requested 3994 - we can actually subassemble the active processes 3995 - we can find a suitable number of MPI processes where we can place the subassembled problem 3996 */ 3997 multilevel_allowed = PETSC_FALSE; 3998 multilevel_requested = PETSC_FALSE; 3999 if (pcbddc->current_level < pcbddc->max_levels) { 4000 multilevel_requested = PETSC_TRUE; 4001 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4002 multilevel_allowed = PETSC_FALSE; 4003 } else { 4004 multilevel_allowed = PETSC_TRUE; 4005 } 4006 } 4007 /* determine number of process partecipating to coarse solver */ 4008 if (multilevel_allowed) { 4009 ncoarse = ncoarse_ml; 4010 csin = csin_ml; 4011 redist = PETSC_FALSE; 4012 } else { 4013 ncoarse = ncoarse_ds; 4014 csin = csin_ds; 4015 } 4016 4017 /* creates temporary l2gmap and IS for coarse indexes */ 4018 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4019 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4020 4021 /* creates temporary MATIS object for coarse matrix */ 4022 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4023 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4024 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4025 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4026 #if 0 4027 { 4028 PetscViewer viewer; 4029 char filename[256]; 4030 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4031 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4032 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4033 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4034 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4035 } 4036 #endif 4037 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4038 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4039 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4040 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4041 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4042 4043 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4044 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4045 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4046 const PetscInt *idxs; 4047 ISLocalToGlobalMapping tmap; 4048 4049 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4050 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4051 /* allocate space for temporary storage */ 4052 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4053 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4054 /* allocate for IS array */ 4055 nisdofs = pcbddc->n_ISForDofsLocal; 4056 nisneu = !!pcbddc->NeumannBoundariesLocal; 4057 nis = nisdofs + nisneu; 4058 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4059 /* dofs splitting */ 4060 for (i=0;i<nisdofs;i++) { 4061 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4062 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4063 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4064 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4065 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4066 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4067 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4068 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4069 } 4070 /* neumann boundaries */ 4071 if (pcbddc->NeumannBoundariesLocal) { 4072 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4073 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4074 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4075 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4076 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4077 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4078 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4079 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4080 } 4081 /* free memory */ 4082 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4083 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4084 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4085 } else { 4086 nis = 0; 4087 nisdofs = 0; 4088 nisneu = 0; 4089 isarray = NULL; 4090 } 4091 /* destroy no longer needed map */ 4092 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4093 4094 /* restrict on coarse candidates (if needed) */ 4095 coarse_mat_is = NULL; 4096 if (csin) { 4097 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4098 if (redist) { 4099 PetscMPIInt rank; 4100 PetscInt spc,n_spc_p1,dest[1],destsize; 4101 4102 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4103 spc = active_procs/ncoarse; 4104 n_spc_p1 = active_procs%ncoarse; 4105 if (im_active) { 4106 destsize = 1; 4107 if (rank > n_spc_p1*(spc+1)-1) { 4108 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4109 } else { 4110 dest[0] = rank/(spc+1); 4111 } 4112 } else { 4113 destsize = 0; 4114 } 4115 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4116 } else if (csin_type_simple) { 4117 PetscMPIInt rank; 4118 PetscInt issize,isidx; 4119 4120 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4121 if (im_active) { 4122 issize = 1; 4123 isidx = (PetscInt)rank; 4124 } else { 4125 issize = 0; 4126 isidx = -1; 4127 } 4128 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4129 } else { /* get a suitable subassembling pattern from MATIS code */ 4130 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4131 } 4132 4133 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4134 if (!redist || ncoarse <= void_procs) { 4135 PetscInt ncoarse_cand,tissize,*nisindices; 4136 PetscInt *coarse_candidates; 4137 const PetscInt* tisindices; 4138 4139 /* get coarse candidates' ranks in pc communicator */ 4140 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4141 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4142 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4143 if (!coarse_candidates[i]) { 4144 coarse_candidates[ncoarse_cand++]=i; 4145 } 4146 } 4147 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4148 4149 4150 if (pcbddc->dbg_flag) { 4151 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4152 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4153 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4154 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4155 for (i=0;i<ncoarse_cand;i++) { 4156 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4157 } 4158 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4159 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4160 } 4161 /* shift the pattern on coarse candidates */ 4162 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4163 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4164 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4165 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4166 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4167 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4168 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4169 } 4170 if (pcbddc->dbg_flag) { 4171 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4172 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4173 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4174 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4175 } 4176 } 4177 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4178 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4179 } else { 4180 if (pcbddc->dbg_flag) { 4181 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4182 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4183 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4184 } 4185 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4186 coarse_mat_is = t_coarse_mat_is; 4187 } 4188 4189 /* create local to global scatters for coarse problem */ 4190 if (compute_vecs) { 4191 PetscInt lrows; 4192 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4193 if (coarse_mat_is) { 4194 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4195 } else { 4196 lrows = 0; 4197 } 4198 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4199 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4200 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4201 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4202 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4203 } 4204 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4205 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4206 4207 /* set defaults for coarse KSP and PC */ 4208 if (multilevel_allowed) { 4209 coarse_ksp_type = KSPRICHARDSON; 4210 coarse_pc_type = PCBDDC; 4211 } else { 4212 coarse_ksp_type = KSPPREONLY; 4213 coarse_pc_type = PCREDUNDANT; 4214 } 4215 4216 /* print some info if requested */ 4217 if (pcbddc->dbg_flag) { 4218 if (!multilevel_allowed) { 4219 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4220 if (multilevel_requested) { 4221 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4222 } else if (pcbddc->max_levels) { 4223 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4224 } 4225 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4226 } 4227 } 4228 4229 /* create the coarse KSP object only once with defaults */ 4230 if (coarse_mat_is) { 4231 MatReuse coarse_mat_reuse; 4232 PetscViewer dbg_viewer = NULL; 4233 if (pcbddc->dbg_flag) { 4234 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4235 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4236 } 4237 if (!pcbddc->coarse_ksp) { 4238 char prefix[256],str_level[16]; 4239 size_t len; 4240 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4241 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4242 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4243 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4244 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4245 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4246 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4247 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4248 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4249 /* prefix */ 4250 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4251 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4252 if (!pcbddc->current_level) { 4253 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4254 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4255 } else { 4256 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4257 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4258 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4259 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4260 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4261 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4262 } 4263 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4264 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4265 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4266 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4267 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4268 /* allow user customization */ 4269 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4270 } 4271 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4272 if (nisdofs) { 4273 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4274 for (i=0;i<nisdofs;i++) { 4275 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4276 } 4277 } 4278 if (nisneu) { 4279 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4280 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4281 } 4282 4283 /* get some info after set from options */ 4284 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4285 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4286 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4287 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4288 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4289 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4290 isbddc = PETSC_FALSE; 4291 } 4292 if (isredundant) { 4293 KSP inner_ksp; 4294 PC inner_pc; 4295 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4296 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4297 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4298 } 4299 4300 /* assemble coarse matrix */ 4301 if (coarse_reuse) { 4302 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4303 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4304 coarse_mat_reuse = MAT_REUSE_MATRIX; 4305 } else { 4306 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4307 } 4308 if (isbddc || isnn) { 4309 if (pcbddc->coarsening_ratio > 1) { 4310 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4311 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4312 if (pcbddc->dbg_flag) { 4313 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4314 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4315 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4316 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4317 } 4318 } 4319 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4320 } else { 4321 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4322 coarse_mat = coarse_mat_is; 4323 } 4324 } else { 4325 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4326 } 4327 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4328 4329 /* propagate symmetry info to coarse matrix */ 4330 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4331 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4332 4333 /* set operators */ 4334 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4335 if (pcbddc->dbg_flag) { 4336 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4337 } 4338 } else { /* processes non partecipating to coarse solver (if any) */ 4339 coarse_mat = 0; 4340 } 4341 ierr = PetscFree(isarray);CHKERRQ(ierr); 4342 #if 0 4343 { 4344 PetscViewer viewer; 4345 char filename[256]; 4346 sprintf(filename,"coarse_mat.m"); 4347 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4348 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4349 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4350 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4351 } 4352 #endif 4353 4354 /* Compute coarse null space (special handling by BDDC only) */ 4355 if (pcbddc->NullSpace) { 4356 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4357 } 4358 4359 if (pcbddc->coarse_ksp) { 4360 Vec crhs,csol; 4361 PetscBool ispreonly; 4362 if (CoarseNullSpace) { 4363 if (isbddc) { 4364 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4365 } else { 4366 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4367 } 4368 } 4369 /* setup coarse ksp */ 4370 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4371 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4372 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4373 /* hack */ 4374 if (!csol) { 4375 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4376 } 4377 if (!crhs) { 4378 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4379 } 4380 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4381 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4382 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4383 KSP check_ksp; 4384 KSPType check_ksp_type; 4385 PC check_pc; 4386 Vec check_vec,coarse_vec; 4387 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4388 PetscInt its; 4389 PetscBool compute_eigs; 4390 PetscReal *eigs_r,*eigs_c; 4391 PetscInt neigs; 4392 const char *prefix; 4393 4394 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4395 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4396 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4397 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4398 if (ispreonly) { 4399 check_ksp_type = KSPPREONLY; 4400 compute_eigs = PETSC_FALSE; 4401 } else { 4402 check_ksp_type = KSPGMRES; 4403 compute_eigs = PETSC_TRUE; 4404 } 4405 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4406 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4407 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4408 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4409 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4410 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4411 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4412 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4413 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4414 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4415 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4416 /* create random vec */ 4417 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4418 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4419 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4420 if (CoarseNullSpace) { 4421 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4422 } 4423 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4424 /* solve coarse problem */ 4425 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4426 if (CoarseNullSpace) { 4427 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4428 } 4429 /* set eigenvalue estimation if preonly has not been requested */ 4430 if (compute_eigs) { 4431 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4432 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4433 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4434 lambda_max = eigs_r[neigs-1]; 4435 lambda_min = eigs_r[0]; 4436 if (pcbddc->use_coarse_estimates) { 4437 if (lambda_max>lambda_min) { 4438 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4439 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4440 } 4441 } 4442 } 4443 4444 /* check coarse problem residual error */ 4445 if (pcbddc->dbg_flag) { 4446 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4447 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4448 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4449 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4450 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4451 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4452 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4453 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4454 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4455 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4456 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4457 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4458 if (compute_eigs) { 4459 PetscReal lambda_max_s,lambda_min_s; 4460 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4461 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4462 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4463 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4464 for (i=0;i<neigs;i++) { 4465 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4466 } 4467 } 4468 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4469 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4470 } 4471 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4472 if (compute_eigs) { 4473 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4474 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4475 } 4476 } 4477 } 4478 /* print additional info */ 4479 if (pcbddc->dbg_flag) { 4480 /* waits until all processes reaches this point */ 4481 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4482 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4483 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4484 } 4485 4486 /* free memory */ 4487 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4488 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4489 PetscFunctionReturn(0); 4490 } 4491 4492 #undef __FUNCT__ 4493 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4494 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4495 { 4496 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4497 PC_IS* pcis = (PC_IS*)pc->data; 4498 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4499 PetscInt i,local_size,coarse_size=0; 4500 PetscInt *local_primal_indices=NULL; 4501 PetscInt *t_local_primal_indices=NULL; 4502 PetscErrorCode ierr; 4503 4504 PetscFunctionBegin; 4505 /* Compute global number of coarse dofs */ 4506 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4507 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4508 } 4509 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr); 4510 ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr); 4511 local_size = 0; 4512 for (i=0;i<pcbddc->local_primal_size_cc;i++) { 4513 PetscInt j; 4514 for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j; 4515 } 4516 ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr); 4517 if (local_size != pcbddc->local_primal_size) { 4518 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4519 } 4520 4521 /* check numbering */ 4522 if (pcbddc->dbg_flag) { 4523 PetscScalar coarsesum,*array; 4524 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4525 4526 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4527 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4528 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4529 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4530 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4531 for (i=0;i<pcbddc->local_primal_size;i++) { 4532 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4533 } 4534 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4535 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4536 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4537 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4538 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4539 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4540 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4541 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4542 for (i=0;i<pcis->n;i++) { 4543 if (array[i] == 1.0) { 4544 set_error = PETSC_TRUE; 4545 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4546 } 4547 } 4548 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4549 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4550 for (i=0;i<pcis->n;i++) { 4551 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4552 } 4553 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4554 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4555 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4556 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4557 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4558 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4559 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4560 PetscInt *gidxs; 4561 4562 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4563 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4564 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4565 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4566 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4567 for (i=0;i<pcbddc->local_primal_size;i++) { 4568 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4569 } 4570 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4571 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4572 } 4573 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4574 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4575 } 4576 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4577 /* get back data */ 4578 *coarse_size_n = coarse_size; 4579 *local_primal_indices_n = local_primal_indices; 4580 PetscFunctionReturn(0); 4581 } 4582 4583 #undef __FUNCT__ 4584 #define __FUNCT__ "PCBDDCGlobalToLocal" 4585 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4586 { 4587 IS localis_t; 4588 PetscInt i,lsize,*idxs,n; 4589 PetscScalar *vals; 4590 PetscErrorCode ierr; 4591 4592 PetscFunctionBegin; 4593 /* get indices in local ordering exploiting local to global map */ 4594 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4595 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4596 for (i=0;i<lsize;i++) vals[i] = 1.0; 4597 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4598 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4599 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4600 if (idxs) { /* multilevel guard */ 4601 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4602 } 4603 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4604 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4605 ierr = PetscFree(vals);CHKERRQ(ierr); 4606 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4607 /* now compute set in local ordering */ 4608 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4609 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4610 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4611 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4612 for (i=0,lsize=0;i<n;i++) { 4613 if (PetscRealPart(vals[i]) > 0.5) { 4614 lsize++; 4615 } 4616 } 4617 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4618 for (i=0,lsize=0;i<n;i++) { 4619 if (PetscRealPart(vals[i]) > 0.5) { 4620 idxs[lsize++] = i; 4621 } 4622 } 4623 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4624 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4625 *localis = localis_t; 4626 PetscFunctionReturn(0); 4627 } 4628 4629 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4630 #undef __FUNCT__ 4631 #define __FUNCT__ "PCBDDCMatMult_Private" 4632 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4633 { 4634 PCBDDCChange_ctx change_ctx; 4635 PetscErrorCode ierr; 4636 4637 PetscFunctionBegin; 4638 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4639 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4640 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4641 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4642 PetscFunctionReturn(0); 4643 } 4644 4645 #undef __FUNCT__ 4646 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4647 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4648 { 4649 PCBDDCChange_ctx change_ctx; 4650 PetscErrorCode ierr; 4651 4652 PetscFunctionBegin; 4653 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4654 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4655 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4656 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4657 PetscFunctionReturn(0); 4658 } 4659 4660 #undef __FUNCT__ 4661 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4662 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4663 { 4664 PC_IS *pcis=(PC_IS*)pc->data; 4665 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4666 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4667 Mat S_j; 4668 PetscInt *used_xadj,*used_adjncy; 4669 PetscBool free_used_adj; 4670 PetscErrorCode ierr; 4671 4672 PetscFunctionBegin; 4673 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4674 free_used_adj = PETSC_FALSE; 4675 if (pcbddc->sub_schurs_layers == -1) { 4676 used_xadj = NULL; 4677 used_adjncy = NULL; 4678 } else { 4679 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4680 used_xadj = pcbddc->mat_graph->xadj; 4681 used_adjncy = pcbddc->mat_graph->adjncy; 4682 } else if (pcbddc->computed_rowadj) { 4683 used_xadj = pcbddc->mat_graph->xadj; 4684 used_adjncy = pcbddc->mat_graph->adjncy; 4685 } else { 4686 PetscBool flg_row=PETSC_FALSE; 4687 const PetscInt *xadj,*adjncy; 4688 PetscInt nvtxs; 4689 4690 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4691 if (flg_row) { 4692 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4693 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4694 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4695 free_used_adj = PETSC_TRUE; 4696 } else { 4697 pcbddc->sub_schurs_layers = -1; 4698 used_xadj = NULL; 4699 used_adjncy = NULL; 4700 } 4701 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4702 } 4703 } 4704 4705 /* setup sub_schurs data */ 4706 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4707 if (!sub_schurs->use_mumps) { 4708 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4709 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4710 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4711 } else { 4712 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4713 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4714 } 4715 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4716 4717 /* free adjacency */ 4718 if (free_used_adj) { 4719 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4720 } 4721 PetscFunctionReturn(0); 4722 } 4723 4724 #undef __FUNCT__ 4725 #define __FUNCT__ "PCBDDCInitSubSchurs" 4726 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4727 { 4728 PC_IS *pcis=(PC_IS*)pc->data; 4729 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4730 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4731 PCBDDCGraph graph; 4732 PetscErrorCode ierr; 4733 4734 PetscFunctionBegin; 4735 /* attach interface graph for determining subsets */ 4736 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4737 IS verticesIS; 4738 4739 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4740 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4741 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4742 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4743 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4744 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4745 /* 4746 if (pcbddc->dbg_flag) { 4747 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4748 } 4749 */ 4750 } else { 4751 graph = pcbddc->mat_graph; 4752 } 4753 4754 /* sub_schurs init */ 4755 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4756 4757 /* free graph struct */ 4758 if (pcbddc->sub_schurs_rebuild) { 4759 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4760 } 4761 PetscFunctionReturn(0); 4762 } 4763