1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 51 PetscInt subset_size; 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 } 56 57 /* min/max and threshold */ 58 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 59 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 60 nmax = PetscMax(nmin,nmax); 61 allocated_S_St = PETSC_FALSE; 62 if (nmin) { 63 allocated_S_St = PETSC_TRUE; 64 } 65 66 /* allocate lapack workspace */ 67 cum = cum2 = 0; 68 maxneigs = 0; 69 for (i=0;i<sub_schurs->n_subs;i++) { 70 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 71 PetscInt n,subset_size; 72 73 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 74 n = PetscMin(subset_size,nmax); 75 cum += subset_size; 76 cum2 += subset_size*n; 77 maxneigs = PetscMax(maxneigs,n); 78 } 79 } 80 if (mss) { 81 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 82 PetscBLASInt B_itype = 1; 83 PetscBLASInt B_N = mss; 84 PetscReal zero = 0.0; 85 PetscReal eps = 0.0; /* dlamch? */ 86 87 B_lwork = -1; 88 S = NULL; 89 St = NULL; 90 eigs = NULL; 91 eigv = NULL; 92 B_iwork = NULL; 93 B_ifail = NULL; 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 112 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 PetscInt j,subset_size; 155 156 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 157 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 158 const PetscInt *idxs; 159 PetscReal infty = PETSC_MAX_REAL; 160 PetscInt eigs_start = 0; 161 PetscBLASInt B_N; 162 163 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 164 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 165 if (sub_schurs->is_hermitian) { 166 PetscInt j; 167 for (j=0;j<subset_size;j++) { 168 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 for (j=0;j<subset_size;j++) { 171 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 172 } 173 } else { 174 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 } 177 } else { 178 S = Sarray + cumarray; 179 St = Starray + cumarray; 180 } 181 182 /* Threshold: this is an heuristic for edges */ 183 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 184 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 185 186 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 187 PetscBLASInt B_itype = 1; 188 PetscBLASInt B_IL, B_IU; 189 PetscReal eps = -1.0; /* dlamch? */ 190 PetscInt nmin_s; 191 192 /* ask for eigenvalues larger than thresh */ 193 if (pcbddc->dbg_flag) { 194 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 195 } 196 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 197 #if defined(PETSC_USE_COMPLEX) 198 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 199 #else 200 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 201 #endif 202 ierr = PetscFPTrapPop();CHKERRQ(ierr); 203 if (B_ierr) { 204 if (B_ierr < 0 ) { 205 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 206 } else if (B_ierr <= B_N) { 207 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 208 } else { 209 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 210 } 211 } 212 213 if (B_neigs > nmax) { 214 if (pcbddc->dbg_flag) { 215 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 216 } 217 eigs_start = B_neigs -nmax; 218 B_neigs = nmax; 219 } 220 221 nmin_s = PetscMin(nmin,B_N); 222 if (B_neigs < nmin_s) { 223 PetscBLASInt B_neigs2; 224 225 B_IU = B_N - B_neigs; 226 B_IL = B_N - nmin_s + 1; 227 if (pcbddc->dbg_flag) { 228 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 229 } 230 if (sub_schurs->is_hermitian) { 231 PetscInt j; 232 for (j=0;j<subset_size;j++) { 233 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 234 } 235 for (j=0;j<subset_size;j++) { 236 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 237 } 238 } else { 239 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 240 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 241 } 242 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 243 #if defined(PETSC_USE_COMPLEX) 244 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 245 #else 246 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 247 #endif 248 ierr = PetscFPTrapPop();CHKERRQ(ierr); 249 B_neigs += B_neigs2; 250 } 251 if (B_ierr) { 252 if (B_ierr < 0 ) { 253 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 254 } else if (B_ierr <= B_N) { 255 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 256 } else { 257 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 258 } 259 } 260 if (pcbddc->dbg_flag) { 261 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 262 for (j=0;j<B_neigs;j++) { 263 if (eigs[j] == 0.0) { 264 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 265 } else { 266 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]); 267 } 268 } 269 } 270 } else { 271 /* TODO */ 272 } 273 274 maxneigs = PetscMax(B_neigs,maxneigs); 275 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 276 if (B_neigs) { 277 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 278 279 if (pcbddc->dbg_flag > 1) { 280 PetscInt ii; 281 for (ii=0;ii<B_neigs;ii++) { 282 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 283 for (j=0;j<B_N;j++) { 284 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 285 } 286 } 287 } 288 #if 0 289 for (j=0;j<B_neigs;j++) { 290 PetscBLASInt Blas_N,Blas_one = 1.0; 291 PetscScalar norm; 292 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 293 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 294 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 295 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 296 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 297 } else { 298 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 299 } 300 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 301 } 302 #endif 303 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 304 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 305 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 306 cum++; 307 } 308 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 309 } 310 /* shift for next computation */ 311 cumarray += subset_size*subset_size; 312 } 313 if (pcbddc->dbg_flag) { 314 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 315 } 316 317 if (mss) { 318 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 319 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 320 /* destroy matrices (junk) */ 321 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 322 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 323 } 324 if (allocated_S_St) { 325 ierr = PetscFree2(S,St);CHKERRQ(ierr); 326 } 327 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 328 #if defined(PETSC_USE_COMPLEX) 329 ierr = PetscFree(rwork);CHKERRQ(ierr); 330 #endif 331 if (pcbddc->dbg_flag) { 332 PetscInt maxneigs_r; 333 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 334 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 335 } 336 PetscFunctionReturn(0); 337 } 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "PCBDDCSetUpSolvers" 341 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 342 { 343 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 344 PetscScalar *coarse_submat_vals; 345 PetscErrorCode ierr; 346 347 PetscFunctionBegin; 348 /* Setup local scatters R_to_B and (optionally) R_to_D */ 349 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 350 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 351 352 /* Setup local neumann solver ksp_R */ 353 /* PCBDDCSetUpLocalScatters should be called first! */ 354 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 355 356 /* Change global null space passed in by the user if change of basis has been requested */ 357 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 358 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 359 } 360 361 /* 362 Setup local correction and local part of coarse basis. 363 Gives back the dense local part of the coarse matrix in column major ordering 364 */ 365 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 366 367 /* Compute total number of coarse nodes and setup coarse solver */ 368 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 369 370 /* free */ 371 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "PCBDDCResetCustomization" 377 PetscErrorCode PCBDDCResetCustomization(PC pc) 378 { 379 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 384 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 385 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 386 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 387 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 388 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 389 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 390 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 392 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 393 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 #undef __FUNCT__ 398 #define __FUNCT__ "PCBDDCResetTopography" 399 PetscErrorCode PCBDDCResetTopography(PC pc) 400 { 401 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 402 PetscErrorCode ierr; 403 404 PetscFunctionBegin; 405 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 406 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 407 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 408 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 409 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "PCBDDCResetSolvers" 415 PetscErrorCode PCBDDCResetSolvers(PC pc) 416 { 417 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 418 PetscScalar *array; 419 PetscErrorCode ierr; 420 421 PetscFunctionBegin; 422 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 423 if (pcbddc->coarse_phi_B) { 424 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 425 ierr = PetscFree(array);CHKERRQ(ierr); 426 } 427 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 428 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 429 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 430 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 431 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 432 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 433 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 434 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 435 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 436 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 437 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 438 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 439 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 440 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 441 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 442 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 443 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 444 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 445 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 446 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 447 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 448 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 449 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 450 PetscFunctionReturn(0); 451 } 452 453 #undef __FUNCT__ 454 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 455 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 456 { 457 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 458 PC_IS *pcis = (PC_IS*)pc->data; 459 VecType impVecType; 460 PetscInt n_constraints,n_R,old_size; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 if (!pcbddc->ConstraintMatrix) { 465 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 466 } 467 /* get sizes */ 468 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 469 n_R = pcis->n-pcbddc->n_vertices; 470 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 471 /* local work vectors (try to avoid unneeded work)*/ 472 /* R nodes */ 473 old_size = -1; 474 if (pcbddc->vec1_R) { 475 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 476 } 477 if (n_R != old_size) { 478 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 479 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 480 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 481 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 482 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 483 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 484 } 485 /* local primal dofs */ 486 old_size = -1; 487 if (pcbddc->vec1_P) { 488 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 489 } 490 if (pcbddc->local_primal_size != old_size) { 491 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 492 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 493 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 494 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 495 } 496 /* local explicit constraints */ 497 old_size = -1; 498 if (pcbddc->vec1_C) { 499 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 500 } 501 if (n_constraints && n_constraints != old_size) { 502 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 503 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 504 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 505 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 506 } 507 PetscFunctionReturn(0); 508 } 509 510 #undef __FUNCT__ 511 #define __FUNCT__ "PCBDDCSetUpCorrection" 512 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 513 { 514 PetscErrorCode ierr; 515 /* pointers to pcis and pcbddc */ 516 PC_IS* pcis = (PC_IS*)pc->data; 517 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 518 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 519 /* submatrices of local problem */ 520 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 521 /* submatrices of local coarse problem */ 522 Mat S_VV,S_CV,S_VC,S_CC; 523 /* working matrices */ 524 Mat C_CR; 525 /* additional working stuff */ 526 PC pc_R; 527 Mat F; 528 PetscBool isLU,isCHOL,isILU; 529 530 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 531 PetscScalar *work; 532 PetscInt *idx_V_B; 533 PetscInt n,n_vertices,n_constraints; 534 PetscInt i,n_R,n_D,n_B; 535 PetscBool unsymmetric_check; 536 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 537 MatType impMatType; 538 /* some shortcuts to scalars */ 539 PetscScalar one=1.0,m_one=-1.0; 540 541 PetscFunctionBegin; 542 n_vertices = pcbddc->n_vertices; 543 n_constraints = pcbddc->local_primal_size-n_vertices; 544 /* Set Non-overlapping dimensions */ 545 n_B = pcis->n_B; 546 n_D = pcis->n - n_B; 547 n_R = pcis->n - n_vertices; 548 549 /* Set types for local objects needed by BDDC precondtioner */ 550 impMatType = MATSEQDENSE; 551 552 /* vertices in boundary numbering */ 553 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 554 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 555 if (i != n_vertices) { 556 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 557 } 558 559 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 560 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 562 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 563 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 564 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 565 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 566 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 567 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 568 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 569 570 unsymmetric_check = PETSC_FALSE; 571 /* allocate workspace */ 572 n = 0; 573 if (n_constraints) { 574 n += n_R*n_constraints; 575 } 576 if (n_vertices) { 577 n = PetscMax(2*n_R*n_vertices,n); 578 n = PetscMax((n_R+n_B)*n_vertices,n); 579 } 580 if (!pcbddc->issym) { 581 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 582 unsymmetric_check = PETSC_TRUE; 583 } 584 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 585 586 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 587 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 588 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 589 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 590 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 591 if (isLU || isILU || isCHOL) { 592 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 593 } else if (sub_schurs->reuse_mumps) { 594 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 595 MatFactorType type; 596 597 F = reuse_mumps->F; 598 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 599 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 600 } else { 601 F = NULL; 602 } 603 604 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 605 if (n_constraints) { 606 Mat M1,M2,M3; 607 Mat auxmat; 608 IS is_aux; 609 PetscScalar *array,*array2; 610 611 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 612 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 613 614 /* Extract constraints on R nodes: C_{CR} */ 615 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 616 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 617 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 618 619 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 620 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 621 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 622 for (i=0;i<n_constraints;i++) { 623 const PetscScalar *row_cmat_values; 624 const PetscInt *row_cmat_indices; 625 PetscInt size_of_constraint,j; 626 627 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 628 for (j=0;j<size_of_constraint;j++) { 629 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 630 } 631 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 632 } 633 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 634 if (F) { 635 Mat B; 636 637 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 638 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 639 ierr = MatDestroy(&B);CHKERRQ(ierr); 640 } else { 641 PetscScalar *marr; 642 643 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 644 for (i=0;i<n_constraints;i++) { 645 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 646 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 647 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 648 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 649 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 650 } 651 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 652 } 653 if (!pcbddc->switch_static) { 654 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 655 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 656 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 657 for (i=0;i<n_constraints;i++) { 658 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 659 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 660 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 661 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 662 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 663 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 664 } 665 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 666 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 667 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 668 } else { 669 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 670 pcbddc->local_auxmat2 = local_auxmat2_R; 671 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 672 } 673 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 674 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 675 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 676 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 677 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 678 if (isCHOL) { 679 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 680 } else { 681 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 682 } 683 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 684 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 685 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 686 ierr = MatDestroy(&M2);CHKERRQ(ierr); 687 ierr = MatDestroy(&M3);CHKERRQ(ierr); 688 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 689 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 690 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 691 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 692 ierr = MatDestroy(&M1);CHKERRQ(ierr); 693 } 694 /* Get submatrices from subdomain matrix */ 695 if (n_vertices) { 696 Mat newmat; 697 IS is_aux; 698 PetscInt ibs,mbs; 699 PetscBool issbaij; 700 701 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 702 IS tis; 703 704 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 705 ierr = ISSort(tis);CHKERRQ(ierr); 706 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 707 ierr = ISDestroy(&tis);CHKERRQ(ierr); 708 } else { 709 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 710 } 711 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 712 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 713 if (ibs != mbs) { /* need to convert to SEQAIJ */ 714 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 715 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 716 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 717 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 718 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 719 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 720 } else { 721 /* this is safe */ 722 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 723 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 724 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 725 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 726 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 727 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 728 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 729 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 730 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 731 } else { 732 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 733 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 734 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 735 } 736 /* need to convert to SeqAIJ for MatMatMult with dense matrix */ 737 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 738 } 739 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 740 } 741 742 /* Matrix of coarse basis functions (local) */ 743 if (pcbddc->coarse_phi_B) { 744 PetscInt on_B,on_primal,on_D=n_D; 745 if (pcbddc->coarse_phi_D) { 746 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 747 } 748 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 749 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 750 PetscScalar *marray; 751 752 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 753 ierr = PetscFree(marray);CHKERRQ(ierr); 754 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 755 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 756 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 757 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 758 } 759 } 760 761 if (!pcbddc->coarse_phi_B) { 762 PetscScalar *marray; 763 764 n = n_B*pcbddc->local_primal_size; 765 if (pcbddc->switch_static || pcbddc->dbg_flag) { 766 n += n_D*pcbddc->local_primal_size; 767 } 768 if (!pcbddc->issym) { 769 n *= 2; 770 } 771 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 772 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 773 n = n_B*pcbddc->local_primal_size; 774 if (pcbddc->switch_static || pcbddc->dbg_flag) { 775 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 776 n += n_D*pcbddc->local_primal_size; 777 } 778 if (!pcbddc->issym) { 779 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 780 if (pcbddc->switch_static || pcbddc->dbg_flag) { 781 n = n_B*pcbddc->local_primal_size; 782 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 783 } 784 } else { 785 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 786 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 787 if (pcbddc->switch_static || pcbddc->dbg_flag) { 788 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 789 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 790 } 791 } 792 } 793 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 794 /* vertices */ 795 if (n_vertices) { 796 797 if (n_R) { 798 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 799 PetscBLASInt B_N,B_one = 1; 800 PetscScalar *x,*y; 801 802 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 803 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 804 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 805 if (F) { /* TODO could be optimized for symmetric problems */ 806 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 807 } else { 808 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 809 for (i=0;i<n_vertices;i++) { 810 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 811 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 812 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 813 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 814 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 815 } 816 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 817 } 818 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 819 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 820 if (n_constraints) { 821 Mat B; 822 823 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 824 for (i=0;i<n_vertices;i++) { 825 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 826 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 827 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 828 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 829 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 830 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 831 } 832 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 833 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 834 ierr = MatDestroy(&B);CHKERRQ(ierr); 835 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 836 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 837 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 838 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 839 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 840 ierr = MatDestroy(&B);CHKERRQ(ierr); 841 } 842 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 843 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 844 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 845 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 846 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 847 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 848 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 849 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 850 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 851 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 852 } else { 853 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 854 } 855 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 856 /* coarse basis functions */ 857 for (i=0;i<n_vertices;i++) { 858 PetscScalar *y; 859 860 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 861 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 862 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 863 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 864 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 865 y[n_B*i+idx_V_B[i]] = 1.0; 866 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 867 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 868 869 if (pcbddc->switch_static || pcbddc->dbg_flag) { 870 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 871 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 872 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 875 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 876 } 877 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 878 } 879 } 880 881 if (n_constraints) { 882 Mat B; 883 884 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 885 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 886 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 887 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 888 if (n_vertices) { 889 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 890 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 891 } else { 892 Mat S_VCt; 893 894 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 895 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 896 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 897 } 898 } 899 ierr = MatDestroy(&B);CHKERRQ(ierr); 900 /* coarse basis functions */ 901 for (i=0;i<n_constraints;i++) { 902 PetscScalar *y; 903 904 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 905 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 906 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 907 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 910 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 911 if (pcbddc->switch_static || pcbddc->dbg_flag) { 912 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 913 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 914 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 917 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 918 } 919 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 920 } 921 } 922 if (n_constraints) { 923 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 924 } 925 926 /* compute other basis functions for non-symmetric problems */ 927 if (!pcbddc->issym) { 928 929 if (n_constraints) { 930 Mat S_CCT,B_C; 931 932 /* this is a lazy thing */ 933 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 934 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 935 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 936 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 937 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 938 if (n_vertices) { 939 Mat B_V,S_VCT; 940 941 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 942 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 943 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 944 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 945 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 946 } 947 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 948 } else { /* if there are no constraints, reset work */ 949 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 950 } 951 if (n_vertices && n_R) { 952 Mat A_VRT; 953 PetscScalar *marray; 954 PetscBLASInt B_N,B_one = 1; 955 956 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 957 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 958 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 959 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 960 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 961 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 962 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 963 } 964 965 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 966 for (i=0;i<pcbddc->local_primal_size;i++) { 967 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 968 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 969 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 970 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 971 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 972 } 973 } else { 974 for (i=0;i<pcbddc->local_primal_size;i++) { 975 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 976 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 977 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 978 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 979 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 980 } 981 } 982 /* coarse basis functions */ 983 for (i=0;i<pcbddc->local_primal_size;i++) { 984 PetscScalar *y; 985 986 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 987 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 988 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 989 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991 if (i<n_vertices) { 992 y[n_B*i+idx_V_B[i]] = 1.0; 993 } 994 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 995 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 996 997 if (pcbddc->switch_static || pcbddc->dbg_flag) { 998 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 999 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1003 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1004 } 1005 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1006 } 1007 } 1008 /* free memory */ 1009 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1010 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1011 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1012 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1013 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1014 ierr = PetscFree(work);CHKERRQ(ierr); 1015 if (n_vertices) { 1016 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1017 } 1018 if (n_constraints) { 1019 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1020 } 1021 /* Checking coarse_sub_mat and coarse basis functios */ 1022 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1023 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1024 if (pcbddc->dbg_flag) { 1025 Mat coarse_sub_mat; 1026 Mat AUXMAT,TM1,TM2,TM3,TM4; 1027 Mat coarse_phi_D,coarse_phi_B; 1028 Mat coarse_psi_D,coarse_psi_B; 1029 Mat A_II,A_BB,A_IB,A_BI; 1030 Mat C_B,CPHI; 1031 IS is_dummy; 1032 Vec mones; 1033 MatType checkmattype=MATSEQAIJ; 1034 PetscReal real_value; 1035 1036 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1037 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1038 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1039 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1040 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1041 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1042 if (unsymmetric_check) { 1043 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1044 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1045 } 1046 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1047 1048 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1049 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 1050 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1051 if (unsymmetric_check) { 1052 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1053 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1054 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1055 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1056 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1057 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1058 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1059 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1060 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1061 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1062 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1063 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1064 } else { 1065 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1066 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1067 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1068 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1069 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1070 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1071 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1072 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1073 } 1074 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1075 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1076 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1077 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1078 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1079 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1081 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1082 1083 /* check constraints */ 1084 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1085 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1086 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1087 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1088 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1089 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1090 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1091 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1092 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1093 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1094 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1095 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1096 1097 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1098 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1099 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1100 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1101 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1102 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1103 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1104 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1105 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1106 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1107 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1108 if (unsymmetric_check) { 1109 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1110 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1111 } 1112 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1113 } 1114 /* get back data */ 1115 *coarse_submat_vals_n = coarse_submat_vals; 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1121 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1122 { 1123 Mat *work_mat; 1124 IS isrow_s,iscol_s; 1125 PetscBool rsorted,csorted; 1126 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1131 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1132 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1133 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1134 1135 if (!rsorted) { 1136 const PetscInt *idxs; 1137 PetscInt *idxs_sorted,i; 1138 1139 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1140 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1141 for (i=0;i<rsize;i++) { 1142 idxs_perm_r[i] = i; 1143 } 1144 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1145 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1146 for (i=0;i<rsize;i++) { 1147 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1148 } 1149 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1150 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1151 } else { 1152 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1153 isrow_s = isrow; 1154 } 1155 1156 if (!csorted) { 1157 if (isrow == iscol) { 1158 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1159 iscol_s = isrow_s; 1160 } else { 1161 const PetscInt *idxs; 1162 PetscInt *idxs_sorted,i; 1163 1164 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1165 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1166 for (i=0;i<csize;i++) { 1167 idxs_perm_c[i] = i; 1168 } 1169 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1170 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1171 for (i=0;i<csize;i++) { 1172 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1173 } 1174 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1175 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1176 } 1177 } else { 1178 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1179 iscol_s = iscol; 1180 } 1181 1182 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1183 1184 if (!rsorted || !csorted) { 1185 Mat new_mat; 1186 IS is_perm_r,is_perm_c; 1187 1188 if (!rsorted) { 1189 PetscInt *idxs_r,i; 1190 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1191 for (i=0;i<rsize;i++) { 1192 idxs_r[idxs_perm_r[i]] = i; 1193 } 1194 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1195 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1196 } else { 1197 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1198 } 1199 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1200 1201 if (!csorted) { 1202 if (isrow_s == iscol_s) { 1203 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1204 is_perm_c = is_perm_r; 1205 } else { 1206 PetscInt *idxs_c,i; 1207 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1208 for (i=0;i<csize;i++) { 1209 idxs_c[idxs_perm_c[i]] = i; 1210 } 1211 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1212 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1213 } 1214 } else { 1215 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1216 } 1217 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1218 1219 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1220 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1221 work_mat[0] = new_mat; 1222 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1223 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1224 } 1225 1226 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1227 *B = work_mat[0]; 1228 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1229 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1230 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1236 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1237 { 1238 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1239 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1240 Mat new_mat; 1241 IS is_local,is_global; 1242 PetscInt local_size; 1243 PetscBool isseqaij; 1244 PetscErrorCode ierr; 1245 1246 PetscFunctionBegin; 1247 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1248 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1249 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1250 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1251 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1252 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1253 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1254 1255 /* check */ 1256 if (pcbddc->dbg_flag) { 1257 Vec x,x_change; 1258 PetscReal error; 1259 1260 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1261 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1262 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1263 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1266 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1267 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1268 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1269 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1270 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1271 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1272 ierr = VecDestroy(&x);CHKERRQ(ierr); 1273 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1274 } 1275 1276 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1277 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1278 if (isseqaij) { 1279 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1280 } else { 1281 Mat work_mat; 1282 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1283 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1284 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1285 } 1286 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1287 #if !defined(PETSC_USE_COMPLEX) 1288 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr); 1289 #endif 1290 /* 1291 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1292 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1293 */ 1294 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 #undef __FUNCT__ 1299 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1300 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1301 { 1302 PC_IS* pcis = (PC_IS*)(pc->data); 1303 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1304 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1305 IS is_B_reuse_mumps=NULL; 1306 PetscInt *idx_R_local; 1307 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1308 PetscInt vbs,bs; 1309 PetscBT bitmask=NULL; 1310 PetscErrorCode ierr; 1311 1312 PetscFunctionBegin; 1313 /* 1314 No need to setup local scatters if 1315 - primal space is unchanged 1316 AND 1317 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1318 AND 1319 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1320 */ 1321 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1322 PetscFunctionReturn(0); 1323 } 1324 /* destroy old objects */ 1325 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1326 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1327 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1328 /* Set Non-overlapping dimensions */ 1329 n_B = pcis->n_B; 1330 n_D = pcis->n - n_B; 1331 n_vertices = pcbddc->n_vertices; 1332 1333 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1334 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1335 1336 /* create auxiliary bitmask */ 1337 if (!sub_schurs->reuse_mumps) { 1338 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1339 for (i=0;i<n_vertices;i++) { 1340 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1341 } 1342 1343 for (i=0, n_R=0; i<pcis->n; i++) { 1344 if (!PetscBTLookup(bitmask,i)) { 1345 idx_R_local[n_R++] = i; 1346 } 1347 } 1348 } else { /* A different ordering and auxiliary scatters are needed we are reusing MUMPS Schur solver */ 1349 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1350 const PetscInt *idxs; 1351 PetscInt schur_size; 1352 IS list[2],tisB_N,tisR; 1353 1354 if (sub_schurs->is_dir) { 1355 list[0] = sub_schurs->is_Ej_all; 1356 list[1] = sub_schurs->is_dir; 1357 ierr = ISGlobalToLocalMappingApplyIS(pcis->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_dir,&list[1]);CHKERRQ(ierr); 1358 ierr = ISConcatenate(PETSC_COMM_SELF,2,list,&is_B_reuse_mumps);CHKERRQ(ierr); 1359 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1360 } else { 1361 ierr = ISDuplicate(sub_schurs->is_Ej_all,&is_B_reuse_mumps);CHKERRQ(ierr); 1362 } 1363 ierr = ISGetLocalSize(is_B_reuse_mumps,&schur_size);CHKERRQ(ierr); 1364 ierr = VecScatterDestroy(&reuse_mumps->correction_scatter_B);CHKERRQ(ierr); 1365 ierr = VecScatterCreate(pcis->vec1_B,is_B_reuse_mumps,reuse_mumps->solB,NULL,&reuse_mumps->correction_scatter_B);CHKERRQ(ierr); 1366 ierr = ISLocalToGlobalMappingApplyIS(sub_schurs->BtoNmap,is_B_reuse_mumps,&tisB_N);CHKERRQ(ierr); 1367 list[0] = pcis->is_I_local; 1368 list[1] = tisB_N; 1369 ierr = ISConcatenate(PETSC_COMM_SELF,2,list,&tisR);CHKERRQ(ierr); 1370 ierr = ISDestroy(&tisB_N);CHKERRQ(ierr); 1371 ierr = ISGetLocalSize(tisR,&n_R);CHKERRQ(ierr); 1372 ierr = ISGetIndices(tisR,&idxs);CHKERRQ(ierr); 1373 ierr = PetscMemcpy(idx_R_local,idxs,n_R*sizeof(PetscInt));CHKERRQ(ierr); 1374 ierr = ISRestoreIndices(tisR,&idxs);CHKERRQ(ierr); 1375 ierr = ISDestroy(&tisR);CHKERRQ(ierr); 1376 } 1377 1378 /* Block code */ 1379 vbs = 1; 1380 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1381 if (bs>1 && !(n_vertices%bs)) { 1382 PetscBool is_blocked = PETSC_TRUE; 1383 PetscInt *vary; 1384 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1385 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1386 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1387 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1388 for (i=0; i<n_vertices/bs; i++) { 1389 if (vary[i]!=0 && vary[i]!=bs) { 1390 is_blocked = PETSC_FALSE; 1391 break; 1392 } 1393 } 1394 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1395 vbs = bs; 1396 for (i=0;i<n_R/vbs;i++) { 1397 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1398 } 1399 } 1400 ierr = PetscFree(vary);CHKERRQ(ierr); 1401 } 1402 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1403 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1404 1405 /* print some info if requested */ 1406 if (pcbddc->dbg_flag) { 1407 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1408 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1409 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1410 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1411 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1412 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1413 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1414 } 1415 1416 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1417 if (!sub_schurs->reuse_mumps) { 1418 IS is_aux1,is_aux2; 1419 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1420 1421 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1422 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1423 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1424 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1425 for (i=0; i<n_D; i++) { 1426 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1427 } 1428 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1429 for (i=0, j=0; i<n_R; i++) { 1430 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1431 aux_array1[j++] = i; 1432 } 1433 } 1434 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1435 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1436 for (i=0, j=0; i<n_B; i++) { 1437 if (!PetscBTLookup(bitmask,is_indices[i])) { 1438 aux_array2[j++] = i; 1439 } 1440 } 1441 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1442 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1443 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1444 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1445 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1446 1447 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1448 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1449 for (i=0, j=0; i<n_R; i++) { 1450 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1451 aux_array1[j++] = i; 1452 } 1453 } 1454 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1455 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1456 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1457 } 1458 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1459 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1460 } else { 1461 IS tis; 1462 PetscInt schur_size; 1463 1464 ierr = ISGetLocalSize(is_B_reuse_mumps,&schur_size);CHKERRQ(ierr); 1465 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1466 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,is_B_reuse_mumps,&pcbddc->R_to_B);CHKERRQ(ierr); 1467 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1468 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1469 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1470 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1471 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1472 } 1473 } 1474 ierr = ISDestroy(&is_B_reuse_mumps);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1481 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1482 { 1483 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1484 PC_IS *pcis = (PC_IS*)pc->data; 1485 PC pc_temp; 1486 Mat A_RR; 1487 MatReuse reuse; 1488 PetscScalar m_one = -1.0; 1489 PetscReal value; 1490 PetscInt n_D,n_R,ibs,mbs; 1491 PetscBool use_exact,use_exact_reduced,issbaij; 1492 PetscErrorCode ierr; 1493 /* prefixes stuff */ 1494 char dir_prefix[256],neu_prefix[256],str_level[16]; 1495 size_t len; 1496 1497 PetscFunctionBegin; 1498 1499 /* compute prefixes */ 1500 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1501 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1502 if (!pcbddc->current_level) { 1503 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1504 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1505 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1506 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1507 } else { 1508 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1509 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1510 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1511 len -= 15; /* remove "pc_bddc_coarse_" */ 1512 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1513 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1514 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1515 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1516 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1517 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1518 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1519 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1520 } 1521 1522 /* DIRICHLET PROBLEM */ 1523 if (dirichlet) { 1524 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1525 if (pcbddc->issym) { 1526 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1527 } 1528 /* Matrix for Dirichlet problem is pcis->A_II */ 1529 n_D = pcis->n - pcis->n_B; 1530 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1531 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1532 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1533 /* default */ 1534 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1535 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1536 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1537 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1538 if (issbaij) { 1539 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1540 } else { 1541 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1542 } 1543 /* Allow user's customization */ 1544 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1545 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1546 } 1547 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1548 if (sub_schurs->reuse_mumps) { 1549 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1550 1551 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1552 } 1553 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1554 if (!n_D) { 1555 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1556 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1557 } 1558 /* Set Up KSP for Dirichlet problem of BDDC */ 1559 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1560 /* set ksp_D into pcis data */ 1561 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1562 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1563 pcis->ksp_D = pcbddc->ksp_D; 1564 } 1565 1566 /* NEUMANN PROBLEM */ 1567 A_RR = 0; 1568 if (neumann) { 1569 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1570 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1571 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1572 if (pcbddc->ksp_R) { /* already created ksp */ 1573 PetscInt nn_R; 1574 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1575 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1576 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1577 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1578 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1579 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1580 reuse = MAT_INITIAL_MATRIX; 1581 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1582 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1583 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1584 reuse = MAT_INITIAL_MATRIX; 1585 } else { /* safe to reuse the matrix */ 1586 reuse = MAT_REUSE_MATRIX; 1587 } 1588 } 1589 /* last check */ 1590 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1591 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1592 reuse = MAT_INITIAL_MATRIX; 1593 } 1594 } else { /* first time, so we need to create the matrix */ 1595 reuse = MAT_INITIAL_MATRIX; 1596 } 1597 /* extract A_RR */ 1598 if (!sub_schurs->reuse_mumps) { 1599 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1600 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1601 if (ibs != mbs) { 1602 Mat newmat; 1603 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1604 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1605 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1606 } else { 1607 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1608 } 1609 if (pcbddc->issym) { 1610 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1611 } 1612 } else { 1613 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1614 1615 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1616 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1617 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1618 } 1619 if (!pcbddc->ksp_R) { /* create object if not present */ 1620 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1621 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1622 /* default */ 1623 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1624 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1625 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1626 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1627 if (issbaij) { 1628 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1629 } else { 1630 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1631 } 1632 /* Allow user's customization */ 1633 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1634 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1635 } 1636 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1637 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1638 if (!n_R) { 1639 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1640 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1641 } 1642 /* Reuse MUMPS solver if it is present */ 1643 if (sub_schurs->reuse_mumps) { 1644 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1645 1646 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1647 } 1648 /* Set Up KSP for Neumann problem of BDDC */ 1649 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1650 } 1651 /* free Neumann problem's matrix */ 1652 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1653 1654 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1655 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1656 if (pcbddc->dbg_flag) { 1657 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1658 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1659 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1660 } 1661 if (dirichlet) { /* Dirichlet */ 1662 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1663 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1664 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1665 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1666 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1667 /* need to be adapted? */ 1668 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1669 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1670 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1671 /* print info */ 1672 if (pcbddc->dbg_flag) { 1673 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1674 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1675 } 1676 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1677 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1678 } 1679 } 1680 if (neumann) { /* Neumann */ 1681 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1682 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1683 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1684 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1685 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1686 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1687 /* need to be adapted? */ 1688 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1689 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1690 /* print info */ 1691 if (pcbddc->dbg_flag) { 1692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1693 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1694 } 1695 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1696 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1697 } 1698 } 1699 } 1700 PetscFunctionReturn(0); 1701 } 1702 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1705 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1706 { 1707 PetscErrorCode ierr; 1708 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1709 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1710 1711 PetscFunctionBegin; 1712 if (!sub_schurs->reuse_mumps) { 1713 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1714 } 1715 if (!pcbddc->switch_static) { 1716 if (applytranspose && pcbddc->local_auxmat1) { 1717 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1718 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1719 } 1720 if (!sub_schurs->reuse_mumps) { 1721 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1722 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1723 } else { 1724 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1725 1726 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhsB,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1727 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhsB,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1728 } 1729 } else { 1730 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1731 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1732 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1733 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1734 if (applytranspose && pcbddc->local_auxmat1) { 1735 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1736 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1737 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1738 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1739 } 1740 } 1741 if (!sub_schurs->reuse_mumps) { 1742 if (applytranspose) { 1743 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1744 } else { 1745 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1746 } 1747 } else { 1748 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1749 1750 if (applytranspose) { 1751 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhsB,reuse_mumps->solB);CHKERRQ(ierr); 1752 } else { 1753 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhsB,reuse_mumps->solB);CHKERRQ(ierr); 1754 } 1755 } 1756 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1757 if (!pcbddc->switch_static) { 1758 if (!sub_schurs->reuse_mumps) { 1759 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1760 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1761 } else { 1762 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1763 1764 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->solB,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1765 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->solB,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1766 } 1767 if (!applytranspose && pcbddc->local_auxmat1) { 1768 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1769 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1770 } 1771 } else { 1772 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1773 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1774 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1775 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1776 if (!applytranspose && pcbddc->local_auxmat1) { 1777 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1778 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1779 } 1780 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1781 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1782 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1783 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1784 } 1785 PetscFunctionReturn(0); 1786 } 1787 1788 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1789 #undef __FUNCT__ 1790 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1791 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1792 { 1793 PetscErrorCode ierr; 1794 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1795 PC_IS* pcis = (PC_IS*) (pc->data); 1796 const PetscScalar zero = 0.0; 1797 1798 PetscFunctionBegin; 1799 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1800 if (applytranspose) { 1801 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1802 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1803 } else { 1804 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1805 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1806 } 1807 /* start communications from local primal nodes to rhs of coarse solver */ 1808 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1809 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1810 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1811 1812 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1813 /* TODO remove null space when doing multilevel */ 1814 if (pcbddc->coarse_ksp) { 1815 Vec rhs,sol; 1816 1817 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1818 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1819 if (applytranspose) { 1820 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1821 } else { 1822 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1823 } 1824 } 1825 1826 /* Local solution on R nodes */ 1827 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1828 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1829 } 1830 1831 /* communications from coarse sol to local primal nodes */ 1832 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1833 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1834 1835 /* Sum contributions from two levels */ 1836 if (applytranspose) { 1837 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1838 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1839 } else { 1840 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1841 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1842 } 1843 PetscFunctionReturn(0); 1844 } 1845 1846 #undef __FUNCT__ 1847 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1848 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1849 { 1850 PetscErrorCode ierr; 1851 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1852 PetscScalar *array; 1853 Vec from,to; 1854 1855 PetscFunctionBegin; 1856 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1857 from = pcbddc->coarse_vec; 1858 to = pcbddc->vec1_P; 1859 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1860 Vec tvec; 1861 1862 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1863 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1864 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1865 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1866 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1867 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1868 } 1869 } else { /* from local to global -> put data in coarse right hand side */ 1870 from = pcbddc->vec1_P; 1871 to = pcbddc->coarse_vec; 1872 } 1873 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1874 PetscFunctionReturn(0); 1875 } 1876 1877 #undef __FUNCT__ 1878 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1879 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1880 { 1881 PetscErrorCode ierr; 1882 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1883 PetscScalar *array; 1884 Vec from,to; 1885 1886 PetscFunctionBegin; 1887 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1888 from = pcbddc->coarse_vec; 1889 to = pcbddc->vec1_P; 1890 } else { /* from local to global -> put data in coarse right hand side */ 1891 from = pcbddc->vec1_P; 1892 to = pcbddc->coarse_vec; 1893 } 1894 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1895 if (smode == SCATTER_FORWARD) { 1896 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1897 Vec tvec; 1898 1899 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1900 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1901 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1902 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1903 } 1904 } else { 1905 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1906 ierr = VecResetArray(from);CHKERRQ(ierr); 1907 } 1908 } 1909 PetscFunctionReturn(0); 1910 } 1911 1912 /* uncomment for testing purposes */ 1913 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1916 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1917 { 1918 PetscErrorCode ierr; 1919 PC_IS* pcis = (PC_IS*)(pc->data); 1920 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1921 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1922 /* one and zero */ 1923 PetscScalar one=1.0,zero=0.0; 1924 /* space to store constraints and their local indices */ 1925 PetscScalar *constraints_data; 1926 PetscInt *constraints_idxs,*constraints_idxs_B; 1927 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1928 PetscInt *constraints_n; 1929 /* iterators */ 1930 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1931 /* BLAS integers */ 1932 PetscBLASInt lwork,lierr; 1933 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1934 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1935 /* reuse */ 1936 PetscInt olocal_primal_size,olocal_primal_size_cc; 1937 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1938 /* change of basis */ 1939 PetscBool qr_needed; 1940 PetscBT change_basis,qr_needed_idx; 1941 /* auxiliary stuff */ 1942 PetscInt *nnz,*is_indices; 1943 PetscInt ncc; 1944 /* some quantities */ 1945 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1946 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1947 1948 PetscFunctionBegin; 1949 /* Destroy Mat objects computed previously */ 1950 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1951 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1952 /* save info on constraints from previous setup (if any) */ 1953 olocal_primal_size = pcbddc->local_primal_size; 1954 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1955 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1956 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1957 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1958 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1959 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1960 1961 /* print some info */ 1962 if (pcbddc->dbg_flag) { 1963 IS vertices; 1964 PetscInt nv,nedges,nfaces; 1965 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1966 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1967 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1968 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1969 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1970 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1971 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1972 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1973 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1974 } 1975 1976 if (!pcbddc->adaptive_selection) { 1977 IS ISForVertices,*ISForFaces,*ISForEdges; 1978 MatNullSpace nearnullsp; 1979 const Vec *nearnullvecs; 1980 Vec *localnearnullsp; 1981 PetscScalar *array; 1982 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1983 PetscBool nnsp_has_cnst; 1984 /* LAPACK working arrays for SVD or POD */ 1985 PetscBool skip_lapack,boolforchange; 1986 PetscScalar *work; 1987 PetscReal *singular_vals; 1988 #if defined(PETSC_USE_COMPLEX) 1989 PetscReal *rwork; 1990 #endif 1991 #if defined(PETSC_MISSING_LAPACK_GESVD) 1992 PetscScalar *temp_basis,*correlation_mat; 1993 #else 1994 PetscBLASInt dummy_int=1; 1995 PetscScalar dummy_scalar=1.; 1996 #endif 1997 1998 /* Get index sets for faces, edges and vertices from graph */ 1999 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2000 /* free unneeded index sets */ 2001 if (!pcbddc->use_vertices) { 2002 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2003 } 2004 if (!pcbddc->use_edges) { 2005 for (i=0;i<n_ISForEdges;i++) { 2006 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2007 } 2008 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2009 n_ISForEdges = 0; 2010 } 2011 if (!pcbddc->use_faces) { 2012 for (i=0;i<n_ISForFaces;i++) { 2013 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2014 } 2015 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2016 n_ISForFaces = 0; 2017 } 2018 2019 #if defined(PETSC_USE_DEBUG) 2020 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2021 Also use_change_of_basis should be consistent among processors */ 2022 if (pcbddc->NullSpace) { 2023 PetscBool tbool[2],gbool[2]; 2024 2025 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2026 pcbddc->use_change_of_basis = PETSC_TRUE; 2027 if (!ISForEdges) { 2028 pcbddc->use_change_on_faces = PETSC_TRUE; 2029 } 2030 } 2031 tbool[0] = pcbddc->use_change_of_basis; 2032 tbool[1] = pcbddc->use_change_on_faces; 2033 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2034 pcbddc->use_change_of_basis = gbool[0]; 2035 pcbddc->use_change_on_faces = gbool[1]; 2036 } 2037 #endif 2038 2039 /* check if near null space is attached to global mat */ 2040 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2041 if (nearnullsp) { 2042 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2043 /* remove any stored info */ 2044 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2045 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2046 /* store information for BDDC solver reuse */ 2047 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2048 pcbddc->onearnullspace = nearnullsp; 2049 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2050 for (i=0;i<nnsp_size;i++) { 2051 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2052 } 2053 } else { /* if near null space is not provided BDDC uses constants by default */ 2054 nnsp_size = 0; 2055 nnsp_has_cnst = PETSC_TRUE; 2056 } 2057 /* get max number of constraints on a single cc */ 2058 max_constraints = nnsp_size; 2059 if (nnsp_has_cnst) max_constraints++; 2060 2061 /* 2062 Evaluate maximum storage size needed by the procedure 2063 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2064 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2065 There can be multiple constraints per connected component 2066 */ 2067 n_vertices = 0; 2068 if (ISForVertices) { 2069 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2070 } 2071 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2072 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2073 2074 total_counts = n_ISForFaces+n_ISForEdges; 2075 total_counts *= max_constraints; 2076 total_counts += n_vertices; 2077 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2078 2079 total_counts = 0; 2080 max_size_of_constraint = 0; 2081 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2082 IS used_is; 2083 if (i<n_ISForEdges) { 2084 used_is = ISForEdges[i]; 2085 } else { 2086 used_is = ISForFaces[i-n_ISForEdges]; 2087 } 2088 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2089 total_counts += j; 2090 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2091 } 2092 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2093 2094 /* get local part of global near null space vectors */ 2095 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2096 for (k=0;k<nnsp_size;k++) { 2097 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2098 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2099 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2100 } 2101 2102 /* whether or not to skip lapack calls */ 2103 skip_lapack = PETSC_TRUE; 2104 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2105 2106 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2107 if (!skip_lapack) { 2108 PetscScalar temp_work; 2109 2110 #if defined(PETSC_MISSING_LAPACK_GESVD) 2111 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2112 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2113 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2114 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2115 #if defined(PETSC_USE_COMPLEX) 2116 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2117 #endif 2118 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2119 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2120 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2121 lwork = -1; 2122 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2123 #if !defined(PETSC_USE_COMPLEX) 2124 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2125 #else 2126 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2127 #endif 2128 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2129 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2130 #else /* on missing GESVD */ 2131 /* SVD */ 2132 PetscInt max_n,min_n; 2133 max_n = max_size_of_constraint; 2134 min_n = max_constraints; 2135 if (max_size_of_constraint < max_constraints) { 2136 min_n = max_size_of_constraint; 2137 max_n = max_constraints; 2138 } 2139 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2140 #if defined(PETSC_USE_COMPLEX) 2141 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2142 #endif 2143 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2144 lwork = -1; 2145 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2146 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2147 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2148 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2149 #if !defined(PETSC_USE_COMPLEX) 2150 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2151 #else 2152 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2153 #endif 2154 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2155 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2156 #endif /* on missing GESVD */ 2157 /* Allocate optimal workspace */ 2158 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2159 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2160 } 2161 /* Now we can loop on constraining sets */ 2162 total_counts = 0; 2163 constraints_idxs_ptr[0] = 0; 2164 constraints_data_ptr[0] = 0; 2165 /* vertices */ 2166 if (n_vertices) { 2167 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2168 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2169 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2170 for (i=0;i<n_vertices;i++) { 2171 constraints_n[total_counts] = 1; 2172 constraints_data[total_counts] = 1.0; 2173 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2174 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2175 total_counts++; 2176 } 2177 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2178 PetscBool used_vertex; 2179 for (i=0;i<n_vertices;i++) { 2180 used_vertex = PETSC_FALSE; 2181 k = 0; 2182 while (!used_vertex && k<nnsp_size) { 2183 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2184 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2185 constraints_n[total_counts] = 1; 2186 constraints_idxs[total_counts] = is_indices[i]; 2187 constraints_data[total_counts] = 1.0; 2188 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2189 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2190 total_counts++; 2191 used_vertex = PETSC_TRUE; 2192 } 2193 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2194 k++; 2195 } 2196 } 2197 } 2198 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2199 n_vertices = total_counts; 2200 } 2201 2202 /* edges and faces */ 2203 total_counts_cc = total_counts; 2204 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2205 IS used_is; 2206 PetscBool idxs_copied = PETSC_FALSE; 2207 2208 if (ncc<n_ISForEdges) { 2209 used_is = ISForEdges[ncc]; 2210 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2211 } else { 2212 used_is = ISForFaces[ncc-n_ISForEdges]; 2213 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2214 } 2215 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2216 2217 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2218 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2219 /* change of basis should not be performed on local periodic nodes */ 2220 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2221 if (nnsp_has_cnst) { 2222 PetscScalar quad_value; 2223 2224 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2225 idxs_copied = PETSC_TRUE; 2226 2227 if (!pcbddc->use_nnsp_true) { 2228 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2229 } else { 2230 quad_value = 1.0; 2231 } 2232 for (j=0;j<size_of_constraint;j++) { 2233 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2234 } 2235 temp_constraints++; 2236 total_counts++; 2237 } 2238 for (k=0;k<nnsp_size;k++) { 2239 PetscReal real_value; 2240 PetscScalar *ptr_to_data; 2241 2242 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2243 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2244 for (j=0;j<size_of_constraint;j++) { 2245 ptr_to_data[j] = array[is_indices[j]]; 2246 } 2247 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2248 /* check if array is null on the connected component */ 2249 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2250 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2251 if (real_value > 0.0) { /* keep indices and values */ 2252 temp_constraints++; 2253 total_counts++; 2254 if (!idxs_copied) { 2255 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2256 idxs_copied = PETSC_TRUE; 2257 } 2258 } 2259 } 2260 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2261 valid_constraints = temp_constraints; 2262 if (!pcbddc->use_nnsp_true && temp_constraints) { 2263 if (temp_constraints == 1) { /* just normalize the constraint */ 2264 PetscScalar norm,*ptr_to_data; 2265 2266 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2267 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2268 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2269 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2270 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2271 } else { /* perform SVD */ 2272 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2273 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2274 2275 #if defined(PETSC_MISSING_LAPACK_GESVD) 2276 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2277 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2278 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2279 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2280 from that computed using LAPACKgesvd 2281 -> This is due to a different computation of eigenvectors in LAPACKheev 2282 -> The quality of the POD-computed basis will be the same */ 2283 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2284 /* Store upper triangular part of correlation matrix */ 2285 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2286 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2287 for (j=0;j<temp_constraints;j++) { 2288 for (k=0;k<j+1;k++) { 2289 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2290 } 2291 } 2292 /* compute eigenvalues and eigenvectors of correlation matrix */ 2293 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2294 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2295 #if !defined(PETSC_USE_COMPLEX) 2296 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2297 #else 2298 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2299 #endif 2300 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2301 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2302 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2303 j = 0; 2304 while (j < temp_constraints && singular_vals[j] < tol) j++; 2305 total_counts = total_counts-j; 2306 valid_constraints = temp_constraints-j; 2307 /* scale and copy POD basis into used quadrature memory */ 2308 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2309 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2310 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2311 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2312 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2313 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2314 if (j<temp_constraints) { 2315 PetscInt ii; 2316 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2317 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2318 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2319 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2320 for (k=0;k<temp_constraints-j;k++) { 2321 for (ii=0;ii<size_of_constraint;ii++) { 2322 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2323 } 2324 } 2325 } 2326 #else /* on missing GESVD */ 2327 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2328 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2329 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2330 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2331 #if !defined(PETSC_USE_COMPLEX) 2332 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2333 #else 2334 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2335 #endif 2336 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2337 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2338 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2339 k = temp_constraints; 2340 if (k > size_of_constraint) k = size_of_constraint; 2341 j = 0; 2342 while (j < k && singular_vals[k-j-1] < tol) j++; 2343 valid_constraints = k-j; 2344 total_counts = total_counts-temp_constraints+valid_constraints; 2345 #endif /* on missing GESVD */ 2346 } 2347 } 2348 /* update pointers information */ 2349 if (valid_constraints) { 2350 constraints_n[total_counts_cc] = valid_constraints; 2351 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2352 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2353 /* set change_of_basis flag */ 2354 if (boolforchange) { 2355 PetscBTSet(change_basis,total_counts_cc); 2356 } 2357 total_counts_cc++; 2358 } 2359 } 2360 /* free workspace */ 2361 if (!skip_lapack) { 2362 ierr = PetscFree(work);CHKERRQ(ierr); 2363 #if defined(PETSC_USE_COMPLEX) 2364 ierr = PetscFree(rwork);CHKERRQ(ierr); 2365 #endif 2366 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2367 #if defined(PETSC_MISSING_LAPACK_GESVD) 2368 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2369 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2370 #endif 2371 } 2372 for (k=0;k<nnsp_size;k++) { 2373 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2374 } 2375 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2376 /* free index sets of faces, edges and vertices */ 2377 for (i=0;i<n_ISForFaces;i++) { 2378 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2379 } 2380 if (n_ISForFaces) { 2381 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2382 } 2383 for (i=0;i<n_ISForEdges;i++) { 2384 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2385 } 2386 if (n_ISForEdges) { 2387 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2388 } 2389 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2390 } else { 2391 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2392 2393 total_counts = 0; 2394 n_vertices = 0; 2395 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2396 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2397 } 2398 max_constraints = 0; 2399 total_counts_cc = 0; 2400 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2401 total_counts += pcbddc->adaptive_constraints_n[i]; 2402 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2403 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2404 } 2405 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2406 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2407 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2408 constraints_data = pcbddc->adaptive_constraints_data; 2409 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2410 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2411 total_counts_cc = 0; 2412 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2413 if (pcbddc->adaptive_constraints_n[i]) { 2414 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2415 } 2416 } 2417 #if 0 2418 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2419 for (i=0;i<total_counts_cc;i++) { 2420 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2421 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2422 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2423 printf(" %d",constraints_idxs[j]); 2424 } 2425 printf("\n"); 2426 printf("number of cc: %d\n",constraints_n[i]); 2427 } 2428 for (i=0;i<n_vertices;i++) { 2429 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2430 } 2431 for (i=0;i<sub_schurs->n_subs;i++) { 2432 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2433 } 2434 #endif 2435 2436 max_size_of_constraint = 0; 2437 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2438 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2439 /* Change of basis */ 2440 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2441 if (pcbddc->use_change_of_basis) { 2442 for (i=0;i<sub_schurs->n_subs;i++) { 2443 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2444 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2445 } 2446 } 2447 } 2448 } 2449 pcbddc->local_primal_size = total_counts; 2450 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2451 2452 /* map constraints_idxs in boundary numbering */ 2453 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2454 if (i != constraints_idxs_ptr[total_counts_cc]) { 2455 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2456 } 2457 2458 /* Create constraint matrix */ 2459 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2460 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2461 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2462 2463 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2464 /* determine if a QR strategy is needed for change of basis */ 2465 qr_needed = PETSC_FALSE; 2466 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2467 total_primal_vertices=0; 2468 pcbddc->local_primal_size_cc = 0; 2469 for (i=0;i<total_counts_cc;i++) { 2470 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2471 if (size_of_constraint == 1) { 2472 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2473 pcbddc->local_primal_size_cc += 1; 2474 } else if (PetscBTLookup(change_basis,i)) { 2475 for (k=0;k<constraints_n[i];k++) { 2476 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2477 } 2478 pcbddc->local_primal_size_cc += constraints_n[i]; 2479 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2480 PetscBTSet(qr_needed_idx,i); 2481 qr_needed = PETSC_TRUE; 2482 } 2483 } else { 2484 pcbddc->local_primal_size_cc += 1; 2485 } 2486 } 2487 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2488 pcbddc->n_vertices = total_primal_vertices; 2489 /* permute indices in order to have a sorted set of vertices */ 2490 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2491 2492 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2493 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2494 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2495 2496 /* nonzero structure of constraint matrix */ 2497 /* and get reference dof for local constraints */ 2498 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2499 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2500 2501 j = total_primal_vertices; 2502 total_counts = total_primal_vertices; 2503 cum = total_primal_vertices; 2504 for (i=n_vertices;i<total_counts_cc;i++) { 2505 if (!PetscBTLookup(change_basis,i)) { 2506 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2507 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2508 cum++; 2509 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2510 for (k=0;k<constraints_n[i];k++) { 2511 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2512 nnz[j+k] = size_of_constraint; 2513 } 2514 j += constraints_n[i]; 2515 } 2516 } 2517 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2518 ierr = PetscFree(nnz);CHKERRQ(ierr); 2519 2520 /* set values in constraint matrix */ 2521 for (i=0;i<total_primal_vertices;i++) { 2522 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2523 } 2524 total_counts = total_primal_vertices; 2525 for (i=n_vertices;i<total_counts_cc;i++) { 2526 if (!PetscBTLookup(change_basis,i)) { 2527 PetscInt *cols; 2528 2529 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2530 cols = constraints_idxs+constraints_idxs_ptr[i]; 2531 for (k=0;k<constraints_n[i];k++) { 2532 PetscInt row = total_counts+k; 2533 PetscScalar *vals; 2534 2535 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2536 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2537 } 2538 total_counts += constraints_n[i]; 2539 } 2540 } 2541 /* assembling */ 2542 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2543 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2544 2545 /* 2546 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2547 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2548 */ 2549 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2550 if (pcbddc->use_change_of_basis) { 2551 /* dual and primal dofs on a single cc */ 2552 PetscInt dual_dofs,primal_dofs; 2553 /* working stuff for GEQRF */ 2554 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2555 PetscBLASInt lqr_work; 2556 /* working stuff for UNGQR */ 2557 PetscScalar *gqr_work,lgqr_work_t; 2558 PetscBLASInt lgqr_work; 2559 /* working stuff for TRTRS */ 2560 PetscScalar *trs_rhs; 2561 PetscBLASInt Blas_NRHS; 2562 /* pointers for values insertion into change of basis matrix */ 2563 PetscInt *start_rows,*start_cols; 2564 PetscScalar *start_vals; 2565 /* working stuff for values insertion */ 2566 PetscBT is_primal; 2567 PetscInt *aux_primal_numbering_B; 2568 /* matrix sizes */ 2569 PetscInt global_size,local_size; 2570 /* temporary change of basis */ 2571 Mat localChangeOfBasisMatrix; 2572 /* extra space for debugging */ 2573 PetscScalar *dbg_work; 2574 2575 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2576 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2577 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2578 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2579 /* nonzeros for local mat */ 2580 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2581 for (i=0;i<pcis->n;i++) nnz[i]=1; 2582 for (i=n_vertices;i<total_counts_cc;i++) { 2583 if (PetscBTLookup(change_basis,i)) { 2584 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2585 if (PetscBTLookup(qr_needed_idx,i)) { 2586 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2587 } else { 2588 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2589 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2590 } 2591 } 2592 } 2593 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2594 ierr = PetscFree(nnz);CHKERRQ(ierr); 2595 /* Set initial identity in the matrix */ 2596 for (i=0;i<pcis->n;i++) { 2597 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2598 } 2599 2600 if (pcbddc->dbg_flag) { 2601 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2602 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2603 } 2604 2605 2606 /* Now we loop on the constraints which need a change of basis */ 2607 /* 2608 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2609 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2610 2611 Basic blocks of change of basis matrix T computed by 2612 2613 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2614 2615 | 1 0 ... 0 s_1/S | 2616 | 0 1 ... 0 s_2/S | 2617 | ... | 2618 | 0 ... 1 s_{n-1}/S | 2619 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2620 2621 with S = \sum_{i=1}^n s_i^2 2622 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2623 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2624 2625 - QR decomposition of constraints otherwise 2626 */ 2627 if (qr_needed) { 2628 /* space to store Q */ 2629 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2630 /* first we issue queries for optimal work */ 2631 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2632 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2633 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2634 lqr_work = -1; 2635 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2636 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2637 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2638 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2639 lgqr_work = -1; 2640 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2641 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2642 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2643 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2644 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2645 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2646 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2647 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2648 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2649 /* array to store scaling factors for reflectors */ 2650 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2651 /* array to store rhs and solution of triangular solver */ 2652 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2653 /* allocating workspace for check */ 2654 if (pcbddc->dbg_flag) { 2655 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2656 } 2657 } 2658 /* array to store whether a node is primal or not */ 2659 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2660 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2661 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2662 if (i != total_primal_vertices) { 2663 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2664 } 2665 for (i=0;i<total_primal_vertices;i++) { 2666 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2667 } 2668 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2669 2670 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2671 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2672 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2673 if (PetscBTLookup(change_basis,total_counts)) { 2674 /* get constraint info */ 2675 primal_dofs = constraints_n[total_counts]; 2676 dual_dofs = size_of_constraint-primal_dofs; 2677 2678 if (pcbddc->dbg_flag) { 2679 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2680 } 2681 2682 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2683 2684 /* copy quadrature constraints for change of basis check */ 2685 if (pcbddc->dbg_flag) { 2686 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2687 } 2688 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2689 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2690 2691 /* compute QR decomposition of constraints */ 2692 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2693 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2694 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2695 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2696 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2697 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2698 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2699 2700 /* explictly compute R^-T */ 2701 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2702 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2703 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2704 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2705 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2706 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2707 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2708 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2709 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2710 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2711 2712 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2713 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2714 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2715 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2716 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2717 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2718 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2719 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2720 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2721 2722 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2723 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2724 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2725 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2726 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2727 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2728 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2729 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2730 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2731 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2732 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2733 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2734 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2735 2736 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2737 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2738 /* insert cols for primal dofs */ 2739 for (j=0;j<primal_dofs;j++) { 2740 start_vals = &qr_basis[j*size_of_constraint]; 2741 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2742 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2743 } 2744 /* insert cols for dual dofs */ 2745 for (j=0,k=0;j<dual_dofs;k++) { 2746 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2747 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2748 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2749 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2750 j++; 2751 } 2752 } 2753 2754 /* check change of basis */ 2755 if (pcbddc->dbg_flag) { 2756 PetscInt ii,jj; 2757 PetscBool valid_qr=PETSC_TRUE; 2758 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2759 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2760 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2761 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2762 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2763 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2764 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2765 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2766 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2767 for (jj=0;jj<size_of_constraint;jj++) { 2768 for (ii=0;ii<primal_dofs;ii++) { 2769 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2770 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2771 } 2772 } 2773 if (!valid_qr) { 2774 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2775 for (jj=0;jj<size_of_constraint;jj++) { 2776 for (ii=0;ii<primal_dofs;ii++) { 2777 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2778 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2779 } 2780 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2781 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2782 } 2783 } 2784 } 2785 } else { 2786 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2787 } 2788 } 2789 } else { /* simple transformation block */ 2790 PetscInt row,col; 2791 PetscScalar val,norm; 2792 2793 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2794 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2795 for (j=0;j<size_of_constraint;j++) { 2796 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2797 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2798 if (!PetscBTLookup(is_primal,row_B)) { 2799 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2800 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2801 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2802 } else { 2803 for (k=0;k<size_of_constraint;k++) { 2804 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2805 if (row != col) { 2806 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2807 } else { 2808 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2809 } 2810 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2811 } 2812 } 2813 } 2814 if (pcbddc->dbg_flag) { 2815 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2816 } 2817 } 2818 } else { 2819 if (pcbddc->dbg_flag) { 2820 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2821 } 2822 } 2823 } 2824 2825 /* free workspace */ 2826 if (qr_needed) { 2827 if (pcbddc->dbg_flag) { 2828 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2829 } 2830 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2831 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2832 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2833 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2834 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2835 } 2836 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2837 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2838 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2839 2840 /* assembling of global change of variable */ 2841 { 2842 Mat tmat; 2843 PetscInt bs; 2844 2845 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2846 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2847 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2848 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2849 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2850 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2851 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2852 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2853 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2854 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2855 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2856 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2857 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2858 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2859 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2860 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2861 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2862 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2863 } 2864 /* check */ 2865 if (pcbddc->dbg_flag) { 2866 PetscReal error; 2867 Vec x,x_change; 2868 2869 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2870 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2871 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2872 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2873 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2874 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2875 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2876 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2877 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2878 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2879 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2880 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2881 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2882 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2883 ierr = VecDestroy(&x);CHKERRQ(ierr); 2884 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2885 } 2886 2887 /* adapt sub_schurs computed (if any) */ 2888 if (pcbddc->use_deluxe_scaling) { 2889 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2890 if (sub_schurs->S_Ej_all) { 2891 Mat S_new,tmat; 2892 IS is_all_N; 2893 2894 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2895 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2896 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2897 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2898 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2899 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2900 sub_schurs->S_Ej_all = S_new; 2901 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2902 if (sub_schurs->sum_S_Ej_all) { 2903 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2904 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2905 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2906 sub_schurs->sum_S_Ej_all = S_new; 2907 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2908 } 2909 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2910 } 2911 } 2912 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2913 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2914 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2915 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2916 } 2917 2918 /* set up change of basis context */ 2919 if (pcbddc->ChangeOfBasisMatrix) { 2920 PCBDDCChange_ctx change_ctx; 2921 2922 if (!pcbddc->new_global_mat) { 2923 PetscInt global_size,local_size; 2924 2925 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2926 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2927 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2928 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2929 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2930 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2931 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2932 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2933 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2934 } else { 2935 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2936 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2937 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2938 } 2939 if (!pcbddc->user_ChangeOfBasisMatrix) { 2940 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2941 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2942 } else { 2943 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2944 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2945 } 2946 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2947 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2948 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2949 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2950 } 2951 2952 /* check if a new primal space has been introduced */ 2953 pcbddc->new_primal_space_local = PETSC_TRUE; 2954 if (olocal_primal_size == pcbddc->local_primal_size) { 2955 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2956 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2957 if (!pcbddc->new_primal_space_local) { 2958 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2959 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2960 } 2961 } 2962 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2963 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2964 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2965 2966 /* flush dbg viewer */ 2967 if (pcbddc->dbg_flag) { 2968 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2969 } 2970 2971 /* free workspace */ 2972 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2973 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2974 if (!pcbddc->adaptive_selection) { 2975 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2976 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2977 } else { 2978 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2979 pcbddc->adaptive_constraints_idxs_ptr, 2980 pcbddc->adaptive_constraints_data_ptr, 2981 pcbddc->adaptive_constraints_idxs, 2982 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2983 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2984 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2985 } 2986 PetscFunctionReturn(0); 2987 } 2988 2989 #undef __FUNCT__ 2990 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2991 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2992 { 2993 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2994 PC_IS *pcis = (PC_IS*)pc->data; 2995 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2996 PetscInt ierr,i,vertex_size,N; 2997 PetscViewer viewer=pcbddc->dbg_viewer; 2998 2999 PetscFunctionBegin; 3000 /* Reset previously computed graph */ 3001 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3002 /* Init local Graph struct */ 3003 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3004 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 3005 3006 /* Check validity of the csr graph passed in by the user */ 3007 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3008 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3009 } 3010 3011 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3012 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3013 PetscInt *xadj,*adjncy; 3014 PetscInt nvtxs; 3015 PetscBool flg_row=PETSC_FALSE; 3016 3017 if (pcbddc->use_local_adj) { 3018 3019 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3020 if (flg_row) { 3021 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3022 pcbddc->computed_rowadj = PETSC_TRUE; 3023 } 3024 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3025 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3026 IS is_dummy; 3027 ISLocalToGlobalMapping l2gmap_dummy; 3028 PetscInt j,sum; 3029 PetscInt *cxadj,*cadjncy; 3030 const PetscInt *idxs; 3031 PCBDDCGraph graph; 3032 PetscBT is_on_boundary; 3033 3034 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3035 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3036 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3037 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3038 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3039 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3040 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3041 if (flg_row) { 3042 graph->xadj = xadj; 3043 graph->adjncy = adjncy; 3044 } 3045 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3046 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3047 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3048 3049 if (pcbddc->dbg_flag) { 3050 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3051 for (i=0;i<graph->ncc;i++) { 3052 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3053 } 3054 } 3055 3056 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3057 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3058 for (i=0;i<pcis->n_B;i++) { 3059 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3060 } 3061 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3062 3063 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3064 sum = 0; 3065 for (i=0;i<graph->ncc;i++) { 3066 PetscInt sizecc = 0; 3067 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3068 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3069 sizecc++; 3070 } 3071 } 3072 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3073 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3074 cxadj[graph->queue[j]] = sizecc; 3075 } 3076 } 3077 sum += sizecc*sizecc; 3078 } 3079 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3080 sum = 0; 3081 for (i=0;i<pcis->n;i++) { 3082 PetscInt temp = cxadj[i]; 3083 cxadj[i] = sum; 3084 sum += temp; 3085 } 3086 cxadj[pcis->n] = sum; 3087 for (i=0;i<graph->ncc;i++) { 3088 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3089 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3090 PetscInt k,sizecc = 0; 3091 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3092 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3093 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3094 sizecc++; 3095 } 3096 } 3097 } 3098 } 3099 } 3100 if (sum) { 3101 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3102 } else { 3103 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3104 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3105 } 3106 graph->xadj = 0; 3107 graph->adjncy = 0; 3108 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3109 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3110 } 3111 } 3112 if (pcbddc->dbg_flag) { 3113 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3114 } 3115 3116 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3117 vertex_size = 1; 3118 if (pcbddc->user_provided_isfordofs) { 3119 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3120 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3121 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3122 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3123 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3124 } 3125 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3126 pcbddc->n_ISForDofs = 0; 3127 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3128 } 3129 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3130 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3131 } else { 3132 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3133 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3134 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3135 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3136 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3137 } 3138 } 3139 } 3140 3141 /* Setup of Graph */ 3142 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3143 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3144 } 3145 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3146 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3147 } 3148 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3149 3150 /* Graph's connected components analysis */ 3151 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3152 3153 /* print some info to stdout */ 3154 if (pcbddc->dbg_flag) { 3155 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3156 } 3157 3158 /* mark topography has done */ 3159 pcbddc->recompute_topography = PETSC_FALSE; 3160 PetscFunctionReturn(0); 3161 } 3162 3163 #undef __FUNCT__ 3164 #define __FUNCT__ "PCBDDCSubsetNumbering" 3165 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3166 { 3167 Vec local_vec,global_vec; 3168 IS seqis,paris; 3169 VecScatter scatter_ctx; 3170 PetscScalar *array; 3171 PetscInt *temp_global_dofs; 3172 PetscScalar globalsum; 3173 PetscInt i,j,s; 3174 PetscInt nlocals,first_index,old_index,max_local,max_global; 3175 PetscMPIInt rank_prec_comm,size_prec_comm; 3176 PetscInt *dof_sizes,*dof_displs; 3177 PetscBool first_found; 3178 PetscErrorCode ierr; 3179 3180 PetscFunctionBegin; 3181 /* mpi buffers */ 3182 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3183 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3184 j = ( !rank_prec_comm ? size_prec_comm : 0); 3185 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3186 /* get maximum size of subset */ 3187 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3188 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3189 max_local = 0; 3190 for (i=0;i<n_local_dofs;i++) { 3191 if (max_local < temp_global_dofs[i] ) { 3192 max_local = temp_global_dofs[i]; 3193 } 3194 } 3195 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3196 max_global++; 3197 max_local = 0; 3198 for (i=0;i<n_local_dofs;i++) { 3199 if (max_local < local_dofs[i] ) { 3200 max_local = local_dofs[i]; 3201 } 3202 } 3203 max_local++; 3204 /* allocate workspace */ 3205 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3206 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3207 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3208 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3209 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3210 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3211 /* create scatter */ 3212 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3213 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3214 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3215 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3216 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3217 /* init array */ 3218 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3219 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3220 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3221 if (local_dofs_mult) { 3222 for (i=0;i<n_local_dofs;i++) { 3223 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3224 } 3225 } else { 3226 for (i=0;i<n_local_dofs;i++) { 3227 array[local_dofs[i]]=1.0; 3228 } 3229 } 3230 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3231 /* scatter into global vec and get total number of global dofs */ 3232 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3233 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3234 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3235 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3236 /* Fill global_vec with cumulative function for global numbering */ 3237 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3238 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3239 nlocals = 0; 3240 first_index = -1; 3241 first_found = PETSC_FALSE; 3242 for (i=0;i<s;i++) { 3243 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3244 first_found = PETSC_TRUE; 3245 first_index = i; 3246 } 3247 nlocals += (PetscInt)PetscRealPart(array[i]); 3248 } 3249 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3250 if (!rank_prec_comm) { 3251 dof_displs[0]=0; 3252 for (i=1;i<size_prec_comm;i++) { 3253 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3254 } 3255 } 3256 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3257 if (first_found) { 3258 array[first_index] += (PetscScalar)nlocals; 3259 old_index = first_index; 3260 for (i=first_index+1;i<s;i++) { 3261 if (PetscRealPart(array[i]) > 0.1) { 3262 array[i] += array[old_index]; 3263 old_index = i; 3264 } 3265 } 3266 } 3267 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3268 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3269 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3270 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3271 /* get global ordering of local dofs */ 3272 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3273 if (local_dofs_mult) { 3274 for (i=0;i<n_local_dofs;i++) { 3275 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3276 } 3277 } else { 3278 for (i=0;i<n_local_dofs;i++) { 3279 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3280 } 3281 } 3282 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3283 /* free workspace */ 3284 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3285 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3286 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3287 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3288 /* return pointer to global ordering of local dofs */ 3289 *global_numbering_subset = temp_global_dofs; 3290 PetscFunctionReturn(0); 3291 } 3292 3293 #undef __FUNCT__ 3294 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3295 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3296 { 3297 PetscInt i,j; 3298 PetscScalar *alphas; 3299 PetscErrorCode ierr; 3300 3301 PetscFunctionBegin; 3302 /* this implements stabilized Gram-Schmidt */ 3303 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3304 for (i=0;i<n;i++) { 3305 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3306 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3307 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3308 } 3309 ierr = PetscFree(alphas);CHKERRQ(ierr); 3310 PetscFunctionReturn(0); 3311 } 3312 3313 #undef __FUNCT__ 3314 #define __FUNCT__ "MatISGetSubassemblingPattern" 3315 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3316 { 3317 Mat subdomain_adj; 3318 IS new_ranks,ranks_send_to; 3319 MatPartitioning partitioner; 3320 Mat_IS *matis; 3321 PetscInt n_neighs,*neighs,*n_shared,**shared; 3322 PetscInt prank; 3323 PetscMPIInt size,rank,color; 3324 PetscInt *xadj,*adjncy,*oldranks; 3325 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3326 PetscInt i,local_size,threshold=0; 3327 PetscErrorCode ierr; 3328 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3329 PetscSubcomm subcomm; 3330 3331 PetscFunctionBegin; 3332 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3333 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3334 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3335 3336 /* Get info on mapping */ 3337 matis = (Mat_IS*)(mat->data); 3338 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3339 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3340 3341 /* build local CSR graph of subdomains' connectivity */ 3342 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3343 xadj[0] = 0; 3344 xadj[1] = PetscMax(n_neighs-1,0); 3345 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3346 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3347 3348 if (threshold) { 3349 PetscInt xadj_count = 0; 3350 for (i=1;i<n_neighs;i++) { 3351 if (n_shared[i] > threshold) { 3352 adjncy[xadj_count] = neighs[i]; 3353 adjncy_wgt[xadj_count] = n_shared[i]; 3354 xadj_count++; 3355 } 3356 } 3357 xadj[1] = xadj_count; 3358 } else { 3359 if (xadj[1]) { 3360 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3361 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3362 } 3363 } 3364 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3365 if (use_square) { 3366 for (i=0;i<xadj[1];i++) { 3367 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3368 } 3369 } 3370 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3371 3372 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3373 3374 /* 3375 Restrict work on active processes only. 3376 */ 3377 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3378 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3379 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3380 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3381 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3382 if (color) { 3383 ierr = PetscFree(xadj);CHKERRQ(ierr); 3384 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3385 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3386 } else { 3387 PetscInt coarsening_ratio; 3388 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3389 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3390 prank = rank; 3391 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3392 /* 3393 for (i=0;i<size;i++) { 3394 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3395 } 3396 */ 3397 for (i=0;i<xadj[1];i++) { 3398 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3399 } 3400 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3401 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3402 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3403 3404 /* Partition */ 3405 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3406 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3407 if (use_vwgt) { 3408 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3409 v_wgt[0] = local_size; 3410 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3411 } 3412 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3413 coarsening_ratio = size/n_subdomains; 3414 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3415 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3416 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3417 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3418 3419 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3420 if (contiguous) { 3421 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3422 } else { 3423 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3424 } 3425 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3426 /* clean up */ 3427 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3428 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3429 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3430 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3431 } 3432 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3433 3434 /* assemble parallel IS for sends */ 3435 i = 1; 3436 if (color) i=0; 3437 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3438 3439 /* get back IS */ 3440 *is_sends = ranks_send_to; 3441 PetscFunctionReturn(0); 3442 } 3443 3444 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3445 3446 #undef __FUNCT__ 3447 #define __FUNCT__ "MatISSubassemble" 3448 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3449 { 3450 Mat local_mat; 3451 Mat_IS *matis; 3452 IS is_sends_internal; 3453 PetscInt rows,cols,new_local_rows; 3454 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3455 PetscBool ismatis,isdense,newisdense,destroy_mat; 3456 ISLocalToGlobalMapping l2gmap; 3457 PetscInt* l2gmap_indices; 3458 const PetscInt* is_indices; 3459 MatType new_local_type; 3460 /* buffers */ 3461 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3462 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3463 PetscInt *recv_buffer_idxs_local; 3464 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3465 /* MPI */ 3466 MPI_Comm comm,comm_n; 3467 PetscSubcomm subcomm; 3468 PetscMPIInt n_sends,n_recvs,commsize; 3469 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3470 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3471 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3472 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3473 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3474 PetscErrorCode ierr; 3475 3476 PetscFunctionBegin; 3477 /* TODO: add missing checks */ 3478 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3479 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3480 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3481 PetscValidLogicalCollectiveInt(mat,nis,7); 3482 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3483 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3484 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3485 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3486 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3487 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3488 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3489 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3490 PetscInt mrows,mcols,mnrows,mncols; 3491 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3492 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3493 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3494 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3495 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3496 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3497 } 3498 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3499 PetscValidLogicalCollectiveInt(mat,bs,0); 3500 /* prepare IS for sending if not provided */ 3501 if (!is_sends) { 3502 PetscBool pcontig = PETSC_TRUE; 3503 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3504 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3505 } else { 3506 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3507 is_sends_internal = is_sends; 3508 } 3509 3510 /* get pointer of MATIS data */ 3511 matis = (Mat_IS*)mat->data; 3512 3513 /* get comm */ 3514 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3515 3516 /* compute number of sends */ 3517 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3518 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3519 3520 /* compute number of receives */ 3521 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3522 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3523 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3524 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3525 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3526 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3527 ierr = PetscFree(iflags);CHKERRQ(ierr); 3528 3529 /* restrict comm if requested */ 3530 subcomm = 0; 3531 destroy_mat = PETSC_FALSE; 3532 if (restrict_comm) { 3533 PetscMPIInt color,subcommsize; 3534 3535 color = 0; 3536 if (!n_recvs && n_sends) color = 1; /* processes sending and not receiving anything will not partecipate in new comm */ 3537 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3538 subcommsize = commsize - subcommsize; 3539 /* check if reuse has been requested */ 3540 if (reuse == MAT_REUSE_MATRIX) { 3541 if (*mat_n) { 3542 PetscMPIInt subcommsize2; 3543 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3544 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3545 comm_n = PetscObjectComm((PetscObject)*mat_n); 3546 } else { 3547 comm_n = PETSC_COMM_SELF; 3548 } 3549 } else { /* MAT_INITIAL_MATRIX */ 3550 PetscMPIInt rank; 3551 3552 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3553 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3554 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3555 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3556 comm_n = PetscSubcommChild(subcomm); 3557 } 3558 /* flag to destroy *mat_n if not significative */ 3559 if (color) destroy_mat = PETSC_TRUE; 3560 } else { 3561 comm_n = comm; 3562 } 3563 3564 /* prepare send/receive buffers */ 3565 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3566 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3567 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3568 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3569 if (nis) { 3570 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3571 } 3572 3573 /* Get data from local matrices */ 3574 if (!isdense) { 3575 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3576 /* TODO: See below some guidelines on how to prepare the local buffers */ 3577 /* 3578 send_buffer_vals should contain the raw values of the local matrix 3579 send_buffer_idxs should contain: 3580 - MatType_PRIVATE type 3581 - PetscInt size_of_l2gmap 3582 - PetscInt global_row_indices[size_of_l2gmap] 3583 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3584 */ 3585 } else { 3586 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3587 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3588 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3589 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3590 send_buffer_idxs[1] = i; 3591 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3592 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3593 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3594 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3595 for (i=0;i<n_sends;i++) { 3596 ilengths_vals[is_indices[i]] = len*len; 3597 ilengths_idxs[is_indices[i]] = len+2; 3598 } 3599 } 3600 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3601 /* additional is (if any) */ 3602 if (nis) { 3603 PetscMPIInt psum; 3604 PetscInt j; 3605 for (j=0,psum=0;j<nis;j++) { 3606 PetscInt plen; 3607 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3608 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3609 psum += len+1; /* indices + lenght */ 3610 } 3611 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3612 for (j=0,psum=0;j<nis;j++) { 3613 PetscInt plen; 3614 const PetscInt *is_array_idxs; 3615 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3616 send_buffer_idxs_is[psum] = plen; 3617 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3618 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3619 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3620 psum += plen+1; /* indices + lenght */ 3621 } 3622 for (i=0;i<n_sends;i++) { 3623 ilengths_idxs_is[is_indices[i]] = psum; 3624 } 3625 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3626 } 3627 3628 buf_size_idxs = 0; 3629 buf_size_vals = 0; 3630 buf_size_idxs_is = 0; 3631 for (i=0;i<n_recvs;i++) { 3632 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3633 buf_size_vals += (PetscInt)olengths_vals[i]; 3634 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3635 } 3636 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3637 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3638 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3639 3640 /* get new tags for clean communications */ 3641 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3642 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3643 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3644 3645 /* allocate for requests */ 3646 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3647 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3648 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3649 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3650 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3651 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3652 3653 /* communications */ 3654 ptr_idxs = recv_buffer_idxs; 3655 ptr_vals = recv_buffer_vals; 3656 ptr_idxs_is = recv_buffer_idxs_is; 3657 for (i=0;i<n_recvs;i++) { 3658 source_dest = onodes[i]; 3659 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3660 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3661 ptr_idxs += olengths_idxs[i]; 3662 ptr_vals += olengths_vals[i]; 3663 if (nis) { 3664 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3665 ptr_idxs_is += olengths_idxs_is[i]; 3666 } 3667 } 3668 for (i=0;i<n_sends;i++) { 3669 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3670 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3671 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3672 if (nis) { 3673 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3674 } 3675 } 3676 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3677 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3678 3679 /* assemble new l2g map */ 3680 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3681 ptr_idxs = recv_buffer_idxs; 3682 new_local_rows = 0; 3683 for (i=0;i<n_recvs;i++) { 3684 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3685 ptr_idxs += olengths_idxs[i]; 3686 } 3687 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3688 ptr_idxs = recv_buffer_idxs; 3689 new_local_rows = 0; 3690 for (i=0;i<n_recvs;i++) { 3691 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3692 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3693 ptr_idxs += olengths_idxs[i]; 3694 } 3695 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3696 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3697 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3698 3699 /* infer new local matrix type from received local matrices type */ 3700 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3701 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3702 if (n_recvs) { 3703 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3704 ptr_idxs = recv_buffer_idxs; 3705 for (i=0;i<n_recvs;i++) { 3706 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3707 new_local_type_private = MATAIJ_PRIVATE; 3708 break; 3709 } 3710 ptr_idxs += olengths_idxs[i]; 3711 } 3712 switch (new_local_type_private) { 3713 case MATDENSE_PRIVATE: 3714 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3715 new_local_type = MATSEQAIJ; 3716 bs = 1; 3717 } else { /* if I receive only 1 dense matrix */ 3718 new_local_type = MATSEQDENSE; 3719 bs = 1; 3720 } 3721 break; 3722 case MATAIJ_PRIVATE: 3723 new_local_type = MATSEQAIJ; 3724 bs = 1; 3725 break; 3726 case MATBAIJ_PRIVATE: 3727 new_local_type = MATSEQBAIJ; 3728 break; 3729 case MATSBAIJ_PRIVATE: 3730 new_local_type = MATSEQSBAIJ; 3731 break; 3732 default: 3733 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3734 break; 3735 } 3736 } else { /* by default, new_local_type is seqdense */ 3737 new_local_type = MATSEQDENSE; 3738 bs = 1; 3739 } 3740 3741 /* create MATIS object if needed */ 3742 if (reuse == MAT_INITIAL_MATRIX) { 3743 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3744 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3745 } else { 3746 /* it also destroys the local matrices */ 3747 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3748 } 3749 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3750 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3751 3752 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3753 3754 /* Global to local map of received indices */ 3755 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3756 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3757 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3758 3759 /* restore attributes -> type of incoming data and its size */ 3760 buf_size_idxs = 0; 3761 for (i=0;i<n_recvs;i++) { 3762 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3763 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3764 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3765 } 3766 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3767 3768 /* set preallocation */ 3769 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3770 if (!newisdense) { 3771 PetscInt *new_local_nnz=0; 3772 3773 ptr_vals = recv_buffer_vals; 3774 ptr_idxs = recv_buffer_idxs_local; 3775 if (n_recvs) { 3776 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3777 } 3778 for (i=0;i<n_recvs;i++) { 3779 PetscInt j; 3780 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3781 for (j=0;j<*(ptr_idxs+1);j++) { 3782 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3783 } 3784 } else { 3785 /* TODO */ 3786 } 3787 ptr_idxs += olengths_idxs[i]; 3788 } 3789 if (new_local_nnz) { 3790 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3791 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3792 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3793 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3794 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3795 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3796 } else { 3797 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3798 } 3799 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3800 } else { 3801 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3802 } 3803 3804 /* set values */ 3805 ptr_vals = recv_buffer_vals; 3806 ptr_idxs = recv_buffer_idxs_local; 3807 for (i=0;i<n_recvs;i++) { 3808 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3809 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3810 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3811 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3812 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3813 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3814 } else { 3815 /* TODO */ 3816 } 3817 ptr_idxs += olengths_idxs[i]; 3818 ptr_vals += olengths_vals[i]; 3819 } 3820 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3821 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3822 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3823 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3824 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3825 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3826 3827 #if 0 3828 if (!restrict_comm) { /* check */ 3829 Vec lvec,rvec; 3830 PetscReal infty_error; 3831 3832 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3833 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3834 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3835 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3836 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3837 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3838 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3839 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3840 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3841 } 3842 #endif 3843 3844 /* assemble new additional is (if any) */ 3845 if (nis) { 3846 PetscInt **temp_idxs,*count_is,j,psum; 3847 3848 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3849 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3850 ptr_idxs = recv_buffer_idxs_is; 3851 psum = 0; 3852 for (i=0;i<n_recvs;i++) { 3853 for (j=0;j<nis;j++) { 3854 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3855 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3856 psum += plen; 3857 ptr_idxs += plen+1; /* shift pointer to received data */ 3858 } 3859 } 3860 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3861 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3862 for (i=1;i<nis;i++) { 3863 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3864 } 3865 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3866 ptr_idxs = recv_buffer_idxs_is; 3867 for (i=0;i<n_recvs;i++) { 3868 for (j=0;j<nis;j++) { 3869 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3870 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3871 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3872 ptr_idxs += plen+1; /* shift pointer to received data */ 3873 } 3874 } 3875 for (i=0;i<nis;i++) { 3876 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3877 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3878 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3879 } 3880 ierr = PetscFree(count_is);CHKERRQ(ierr); 3881 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3882 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3883 } 3884 /* free workspace */ 3885 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3886 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3887 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3888 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3889 if (isdense) { 3890 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3891 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3892 } else { 3893 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3894 } 3895 if (nis) { 3896 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3897 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3898 } 3899 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3900 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3901 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3902 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3903 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3904 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3905 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3906 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3907 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3908 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3909 ierr = PetscFree(onodes);CHKERRQ(ierr); 3910 if (nis) { 3911 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3912 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3913 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3914 } 3915 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3916 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3917 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3918 for (i=0;i<nis;i++) { 3919 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3920 } 3921 } 3922 PetscFunctionReturn(0); 3923 } 3924 3925 /* temporary hack into ksp private data structure */ 3926 #include <petsc/private/kspimpl.h> 3927 3928 #undef __FUNCT__ 3929 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3930 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3931 { 3932 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3933 PC_IS *pcis = (PC_IS*)pc->data; 3934 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3935 MatNullSpace CoarseNullSpace=NULL; 3936 ISLocalToGlobalMapping coarse_islg; 3937 IS coarse_is,*isarray; 3938 PetscInt i,im_active=-1,active_procs=-1; 3939 PetscInt nis,nisdofs,nisneu; 3940 PC pc_temp; 3941 PCType coarse_pc_type; 3942 KSPType coarse_ksp_type; 3943 PetscBool multilevel_requested,multilevel_allowed; 3944 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3945 Mat t_coarse_mat_is; 3946 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3947 PetscMPIInt all_procs; 3948 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3949 PetscBool compute_vecs = PETSC_FALSE; 3950 PetscScalar *array; 3951 PetscErrorCode ierr; 3952 3953 PetscFunctionBegin; 3954 /* Assign global numbering to coarse dofs */ 3955 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3956 PetscInt ocoarse_size; 3957 compute_vecs = PETSC_TRUE; 3958 ocoarse_size = pcbddc->coarse_size; 3959 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3960 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3961 /* see if we can avoid some work */ 3962 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3963 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3964 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3965 coarse_reuse = PETSC_FALSE; 3966 } else { /* we can safely reuse already computed coarse matrix */ 3967 coarse_reuse = PETSC_TRUE; 3968 } 3969 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3970 coarse_reuse = PETSC_FALSE; 3971 } 3972 /* reset any subassembling information */ 3973 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3974 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3975 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3976 coarse_reuse = PETSC_TRUE; 3977 } 3978 3979 /* count "active" (i.e. with positive local size) and "void" processes */ 3980 im_active = !!(pcis->n); 3981 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3982 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3983 void_procs = all_procs-active_procs; 3984 csin_type_simple = PETSC_TRUE; 3985 redist = PETSC_FALSE; 3986 if (pcbddc->current_level && void_procs) { 3987 csin_ml = PETSC_TRUE; 3988 ncoarse_ml = void_procs; 3989 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3990 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3991 csin_ds = PETSC_TRUE; 3992 ncoarse_ds = pcbddc->redistribute_coarse; 3993 redist = PETSC_TRUE; 3994 } else { 3995 csin_ds = PETSC_TRUE; 3996 ncoarse_ds = active_procs; 3997 redist = PETSC_TRUE; 3998 } 3999 } else { 4000 csin_ml = PETSC_FALSE; 4001 ncoarse_ml = all_procs; 4002 if (void_procs) { 4003 csin_ds = PETSC_TRUE; 4004 ncoarse_ds = void_procs; 4005 csin_type_simple = PETSC_FALSE; 4006 } else { 4007 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4008 csin_ds = PETSC_TRUE; 4009 ncoarse_ds = pcbddc->redistribute_coarse; 4010 redist = PETSC_TRUE; 4011 } else { 4012 csin_ds = PETSC_FALSE; 4013 ncoarse_ds = all_procs; 4014 } 4015 } 4016 } 4017 4018 /* 4019 test if we can go multilevel: three conditions must be satisfied: 4020 - we have not exceeded the number of levels requested 4021 - we can actually subassemble the active processes 4022 - we can find a suitable number of MPI processes where we can place the subassembled problem 4023 */ 4024 multilevel_allowed = PETSC_FALSE; 4025 multilevel_requested = PETSC_FALSE; 4026 if (pcbddc->current_level < pcbddc->max_levels) { 4027 multilevel_requested = PETSC_TRUE; 4028 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4029 multilevel_allowed = PETSC_FALSE; 4030 } else { 4031 multilevel_allowed = PETSC_TRUE; 4032 } 4033 } 4034 /* determine number of process partecipating to coarse solver */ 4035 if (multilevel_allowed) { 4036 ncoarse = ncoarse_ml; 4037 csin = csin_ml; 4038 redist = PETSC_FALSE; 4039 } else { 4040 ncoarse = ncoarse_ds; 4041 csin = csin_ds; 4042 } 4043 4044 /* creates temporary l2gmap and IS for coarse indexes */ 4045 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4046 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4047 4048 /* creates temporary MATIS object for coarse matrix */ 4049 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4050 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4051 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4052 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4053 #if 0 4054 { 4055 PetscViewer viewer; 4056 char filename[256]; 4057 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4058 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4059 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4060 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4061 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4062 } 4063 #endif 4064 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4065 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4066 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4067 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4068 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4069 4070 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4071 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4072 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4073 const PetscInt *idxs; 4074 ISLocalToGlobalMapping tmap; 4075 4076 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4077 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4078 /* allocate space for temporary storage */ 4079 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4080 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4081 /* allocate for IS array */ 4082 nisdofs = pcbddc->n_ISForDofsLocal; 4083 nisneu = !!pcbddc->NeumannBoundariesLocal; 4084 nis = nisdofs + nisneu; 4085 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4086 /* dofs splitting */ 4087 for (i=0;i<nisdofs;i++) { 4088 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4089 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4090 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4091 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4092 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4093 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4094 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4095 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4096 } 4097 /* neumann boundaries */ 4098 if (pcbddc->NeumannBoundariesLocal) { 4099 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4100 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4101 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4102 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4103 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4104 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4105 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4106 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4107 } 4108 /* free memory */ 4109 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4110 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4111 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4112 } else { 4113 nis = 0; 4114 nisdofs = 0; 4115 nisneu = 0; 4116 isarray = NULL; 4117 } 4118 /* destroy no longer needed map */ 4119 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4120 4121 /* restrict on coarse candidates (if needed) */ 4122 coarse_mat_is = NULL; 4123 if (csin) { 4124 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4125 if (redist) { 4126 PetscMPIInt rank; 4127 PetscInt spc,n_spc_p1,dest[1],destsize; 4128 4129 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4130 spc = active_procs/ncoarse; 4131 n_spc_p1 = active_procs%ncoarse; 4132 if (im_active) { 4133 destsize = 1; 4134 if (rank > n_spc_p1*(spc+1)-1) { 4135 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4136 } else { 4137 dest[0] = rank/(spc+1); 4138 } 4139 } else { 4140 destsize = 0; 4141 } 4142 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4143 } else if (csin_type_simple) { 4144 PetscMPIInt rank; 4145 PetscInt issize,isidx; 4146 4147 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4148 if (im_active) { 4149 issize = 1; 4150 isidx = (PetscInt)rank; 4151 } else { 4152 issize = 0; 4153 isidx = -1; 4154 } 4155 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4156 } else { /* get a suitable subassembling pattern from MATIS code */ 4157 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4158 } 4159 4160 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4161 if (!redist || ncoarse <= void_procs) { 4162 PetscInt ncoarse_cand,tissize,*nisindices; 4163 PetscInt *coarse_candidates; 4164 const PetscInt* tisindices; 4165 4166 /* get coarse candidates' ranks in pc communicator */ 4167 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4168 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4169 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4170 if (!coarse_candidates[i]) { 4171 coarse_candidates[ncoarse_cand++]=i; 4172 } 4173 } 4174 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4175 4176 4177 if (pcbddc->dbg_flag) { 4178 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4179 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4180 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4181 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4182 for (i=0;i<ncoarse_cand;i++) { 4183 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4184 } 4185 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4186 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4187 } 4188 /* shift the pattern on coarse candidates */ 4189 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4190 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4191 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4192 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4193 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4194 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4195 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4196 } 4197 if (pcbddc->dbg_flag) { 4198 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4199 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4200 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4201 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4202 } 4203 } 4204 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4205 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4206 } else { 4207 if (pcbddc->dbg_flag) { 4208 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4209 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4210 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4211 } 4212 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4213 coarse_mat_is = t_coarse_mat_is; 4214 } 4215 4216 /* create local to global scatters for coarse problem */ 4217 if (compute_vecs) { 4218 PetscInt lrows; 4219 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4220 if (coarse_mat_is) { 4221 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4222 } else { 4223 lrows = 0; 4224 } 4225 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4226 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4227 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4228 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4229 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4230 } 4231 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4232 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4233 4234 /* set defaults for coarse KSP and PC */ 4235 if (multilevel_allowed) { 4236 coarse_ksp_type = KSPRICHARDSON; 4237 coarse_pc_type = PCBDDC; 4238 } else { 4239 coarse_ksp_type = KSPPREONLY; 4240 coarse_pc_type = PCREDUNDANT; 4241 } 4242 4243 /* print some info if requested */ 4244 if (pcbddc->dbg_flag) { 4245 if (!multilevel_allowed) { 4246 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4247 if (multilevel_requested) { 4248 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4249 } else if (pcbddc->max_levels) { 4250 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4251 } 4252 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4253 } 4254 } 4255 4256 /* create the coarse KSP object only once with defaults */ 4257 if (coarse_mat_is) { 4258 MatReuse coarse_mat_reuse; 4259 PetscViewer dbg_viewer = NULL; 4260 if (pcbddc->dbg_flag) { 4261 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4262 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4263 } 4264 if (!pcbddc->coarse_ksp) { 4265 char prefix[256],str_level[16]; 4266 size_t len; 4267 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4268 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4269 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4270 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4271 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4272 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4273 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4274 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4275 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4276 /* prefix */ 4277 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4278 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4279 if (!pcbddc->current_level) { 4280 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4281 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4282 } else { 4283 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4284 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4285 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4286 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4287 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4288 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4289 } 4290 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4291 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4292 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4293 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4294 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4295 /* allow user customization */ 4296 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4297 } 4298 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4299 if (nisdofs) { 4300 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4301 for (i=0;i<nisdofs;i++) { 4302 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4303 } 4304 } 4305 if (nisneu) { 4306 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4307 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4308 } 4309 4310 /* get some info after set from options */ 4311 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4312 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4313 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4314 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4315 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4316 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4317 isbddc = PETSC_FALSE; 4318 } 4319 if (isredundant) { 4320 KSP inner_ksp; 4321 PC inner_pc; 4322 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4323 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4324 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4325 } 4326 4327 /* assemble coarse matrix */ 4328 if (coarse_reuse) { 4329 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4330 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4331 coarse_mat_reuse = MAT_REUSE_MATRIX; 4332 } else { 4333 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4334 } 4335 if (isbddc || isnn) { 4336 if (pcbddc->coarsening_ratio > 1) { 4337 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4338 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4339 if (pcbddc->dbg_flag) { 4340 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4341 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4342 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4343 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4344 } 4345 } 4346 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4347 } else { 4348 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4349 coarse_mat = coarse_mat_is; 4350 } 4351 } else { 4352 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4353 } 4354 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4355 4356 /* propagate symmetry info to coarse matrix */ 4357 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4358 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4359 4360 /* set operators */ 4361 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4362 if (pcbddc->dbg_flag) { 4363 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4364 } 4365 } else { /* processes non partecipating to coarse solver (if any) */ 4366 coarse_mat = 0; 4367 } 4368 ierr = PetscFree(isarray);CHKERRQ(ierr); 4369 #if 0 4370 { 4371 PetscViewer viewer; 4372 char filename[256]; 4373 sprintf(filename,"coarse_mat.m"); 4374 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4375 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4376 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4377 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4378 } 4379 #endif 4380 4381 /* Compute coarse null space (special handling by BDDC only) */ 4382 if (pcbddc->NullSpace) { 4383 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4384 } 4385 4386 if (pcbddc->coarse_ksp) { 4387 Vec crhs,csol; 4388 PetscBool ispreonly; 4389 if (CoarseNullSpace) { 4390 if (isbddc) { 4391 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4392 } else { 4393 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4394 } 4395 } 4396 /* setup coarse ksp */ 4397 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4398 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4399 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4400 /* hack */ 4401 if (!csol) { 4402 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4403 } 4404 if (!crhs) { 4405 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4406 } 4407 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4408 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4409 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4410 KSP check_ksp; 4411 KSPType check_ksp_type; 4412 PC check_pc; 4413 Vec check_vec,coarse_vec; 4414 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4415 PetscInt its; 4416 PetscBool compute_eigs; 4417 PetscReal *eigs_r,*eigs_c; 4418 PetscInt neigs; 4419 const char *prefix; 4420 4421 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4422 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4423 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4424 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4425 if (ispreonly) { 4426 check_ksp_type = KSPPREONLY; 4427 compute_eigs = PETSC_FALSE; 4428 } else { 4429 check_ksp_type = KSPGMRES; 4430 compute_eigs = PETSC_TRUE; 4431 } 4432 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4433 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4434 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4435 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4436 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4437 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4438 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4439 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4440 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4441 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4442 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4443 /* create random vec */ 4444 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4445 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4446 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4447 if (CoarseNullSpace) { 4448 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4449 } 4450 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4451 /* solve coarse problem */ 4452 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4453 if (CoarseNullSpace) { 4454 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4455 } 4456 /* set eigenvalue estimation if preonly has not been requested */ 4457 if (compute_eigs) { 4458 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4459 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4460 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4461 lambda_max = eigs_r[neigs-1]; 4462 lambda_min = eigs_r[0]; 4463 if (pcbddc->use_coarse_estimates) { 4464 if (lambda_max>lambda_min) { 4465 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4466 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4467 } 4468 } 4469 } 4470 4471 /* check coarse problem residual error */ 4472 if (pcbddc->dbg_flag) { 4473 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4474 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4475 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4476 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4477 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4478 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4479 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4480 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4481 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4482 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4483 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4484 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4485 if (compute_eigs) { 4486 PetscReal lambda_max_s,lambda_min_s; 4487 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4488 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4489 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4490 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4491 for (i=0;i<neigs;i++) { 4492 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4493 } 4494 } 4495 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4496 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4497 } 4498 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4499 if (compute_eigs) { 4500 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4501 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4502 } 4503 } 4504 } 4505 /* print additional info */ 4506 if (pcbddc->dbg_flag) { 4507 /* waits until all processes reaches this point */ 4508 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4509 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4510 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4511 } 4512 4513 /* free memory */ 4514 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4515 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4516 PetscFunctionReturn(0); 4517 } 4518 4519 #undef __FUNCT__ 4520 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4521 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4522 { 4523 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4524 PC_IS* pcis = (PC_IS*)pc->data; 4525 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4526 PetscInt i,local_size,coarse_size=0; 4527 PetscInt *local_primal_indices=NULL; 4528 PetscInt *t_local_primal_indices=NULL; 4529 PetscErrorCode ierr; 4530 4531 PetscFunctionBegin; 4532 /* Compute global number of coarse dofs */ 4533 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4534 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4535 } 4536 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr); 4537 ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr); 4538 local_size = 0; 4539 for (i=0;i<pcbddc->local_primal_size_cc;i++) { 4540 PetscInt j; 4541 for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j; 4542 } 4543 ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr); 4544 if (local_size != pcbddc->local_primal_size) { 4545 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4546 } 4547 4548 /* check numbering */ 4549 if (pcbddc->dbg_flag) { 4550 PetscScalar coarsesum,*array; 4551 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4552 4553 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4554 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4555 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4556 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4557 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4558 for (i=0;i<pcbddc->local_primal_size;i++) { 4559 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4560 } 4561 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4562 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4563 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4564 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4565 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4566 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4567 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4568 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4569 for (i=0;i<pcis->n;i++) { 4570 if (array[i] == 1.0) { 4571 set_error = PETSC_TRUE; 4572 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4573 } 4574 } 4575 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4576 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4577 for (i=0;i<pcis->n;i++) { 4578 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4579 } 4580 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4581 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4582 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4583 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4584 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4585 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4586 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4587 PetscInt *gidxs; 4588 4589 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4590 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4591 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4592 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4593 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4594 for (i=0;i<pcbddc->local_primal_size;i++) { 4595 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4596 } 4597 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4598 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4599 } 4600 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4601 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4602 } 4603 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4604 /* get back data */ 4605 *coarse_size_n = coarse_size; 4606 *local_primal_indices_n = local_primal_indices; 4607 PetscFunctionReturn(0); 4608 } 4609 4610 #undef __FUNCT__ 4611 #define __FUNCT__ "PCBDDCGlobalToLocal" 4612 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4613 { 4614 IS localis_t; 4615 PetscInt i,lsize,*idxs,n; 4616 PetscScalar *vals; 4617 PetscErrorCode ierr; 4618 4619 PetscFunctionBegin; 4620 /* get indices in local ordering exploiting local to global map */ 4621 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4622 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4623 for (i=0;i<lsize;i++) vals[i] = 1.0; 4624 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4625 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4626 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4627 if (idxs) { /* multilevel guard */ 4628 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4629 } 4630 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4631 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4632 ierr = PetscFree(vals);CHKERRQ(ierr); 4633 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4634 /* now compute set in local ordering */ 4635 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4636 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4637 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4638 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4639 for (i=0,lsize=0;i<n;i++) { 4640 if (PetscRealPart(vals[i]) > 0.5) { 4641 lsize++; 4642 } 4643 } 4644 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4645 for (i=0,lsize=0;i<n;i++) { 4646 if (PetscRealPart(vals[i]) > 0.5) { 4647 idxs[lsize++] = i; 4648 } 4649 } 4650 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4651 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4652 *localis = localis_t; 4653 PetscFunctionReturn(0); 4654 } 4655 4656 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4657 #undef __FUNCT__ 4658 #define __FUNCT__ "PCBDDCMatMult_Private" 4659 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4660 { 4661 PCBDDCChange_ctx change_ctx; 4662 PetscErrorCode ierr; 4663 4664 PetscFunctionBegin; 4665 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4666 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4667 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4668 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4669 PetscFunctionReturn(0); 4670 } 4671 4672 #undef __FUNCT__ 4673 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4674 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4675 { 4676 PCBDDCChange_ctx change_ctx; 4677 PetscErrorCode ierr; 4678 4679 PetscFunctionBegin; 4680 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4681 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4682 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4683 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4684 PetscFunctionReturn(0); 4685 } 4686 4687 #undef __FUNCT__ 4688 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4689 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4690 { 4691 PC_IS *pcis=(PC_IS*)pc->data; 4692 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4693 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4694 Mat S_j; 4695 PetscInt *used_xadj,*used_adjncy; 4696 PetscBool free_used_adj; 4697 PetscErrorCode ierr; 4698 4699 PetscFunctionBegin; 4700 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4701 free_used_adj = PETSC_FALSE; 4702 if (pcbddc->sub_schurs_layers == -1) { 4703 used_xadj = NULL; 4704 used_adjncy = NULL; 4705 } else { 4706 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4707 used_xadj = pcbddc->mat_graph->xadj; 4708 used_adjncy = pcbddc->mat_graph->adjncy; 4709 } else if (pcbddc->computed_rowadj) { 4710 used_xadj = pcbddc->mat_graph->xadj; 4711 used_adjncy = pcbddc->mat_graph->adjncy; 4712 } else { 4713 PetscBool flg_row=PETSC_FALSE; 4714 const PetscInt *xadj,*adjncy; 4715 PetscInt nvtxs; 4716 4717 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4718 if (flg_row) { 4719 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4720 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4721 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4722 free_used_adj = PETSC_TRUE; 4723 } else { 4724 pcbddc->sub_schurs_layers = -1; 4725 used_xadj = NULL; 4726 used_adjncy = NULL; 4727 } 4728 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4729 } 4730 } 4731 4732 /* setup sub_schurs data */ 4733 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4734 if (!sub_schurs->use_mumps) { 4735 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4736 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4737 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4738 } else { 4739 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4740 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4741 } 4742 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4743 4744 /* free adjacency */ 4745 if (free_used_adj) { 4746 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4747 } 4748 PetscFunctionReturn(0); 4749 } 4750 4751 #undef __FUNCT__ 4752 #define __FUNCT__ "PCBDDCInitSubSchurs" 4753 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4754 { 4755 PC_IS *pcis=(PC_IS*)pc->data; 4756 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4757 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4758 PCBDDCGraph graph; 4759 PetscErrorCode ierr; 4760 4761 PetscFunctionBegin; 4762 /* attach interface graph for determining subsets */ 4763 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4764 IS verticesIS; 4765 4766 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4767 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4768 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4769 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4770 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4771 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4772 /* 4773 if (pcbddc->dbg_flag) { 4774 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4775 } 4776 */ 4777 } else { 4778 graph = pcbddc->mat_graph; 4779 } 4780 4781 /* sub_schurs init */ 4782 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4783 4784 /* free graph struct */ 4785 if (pcbddc->sub_schurs_rebuild) { 4786 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4787 } 4788 PetscFunctionReturn(0); 4789 } 4790