1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 51 PetscInt subset_size; 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 } 56 57 /* min/max and threshold */ 58 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 59 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 60 nmax = PetscMax(nmin,nmax); 61 allocated_S_St = PETSC_FALSE; 62 if (nmin) { 63 allocated_S_St = PETSC_TRUE; 64 } 65 66 /* allocate lapack workspace */ 67 cum = cum2 = 0; 68 maxneigs = 0; 69 for (i=0;i<sub_schurs->n_subs;i++) { 70 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 71 PetscInt n,subset_size; 72 73 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 74 n = PetscMin(subset_size,nmax); 75 cum += subset_size; 76 cum2 += subset_size*n; 77 maxneigs = PetscMax(maxneigs,n); 78 } 79 } 80 if (mss) { 81 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 82 PetscBLASInt B_itype = 1; 83 PetscBLASInt B_N = mss; 84 PetscReal zero = 0.0; 85 PetscReal eps = 0.0; /* dlamch? */ 86 87 B_lwork = -1; 88 S = NULL; 89 St = NULL; 90 eigs = NULL; 91 eigv = NULL; 92 B_iwork = NULL; 93 B_ifail = NULL; 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 112 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 PetscInt j,subset_size; 155 156 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 157 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 158 const PetscInt *idxs; 159 PetscReal infty = PETSC_MAX_REAL; 160 PetscInt eigs_start = 0; 161 PetscBLASInt B_N; 162 163 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 164 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 165 if (sub_schurs->is_hermitian) { 166 PetscInt j; 167 for (j=0;j<subset_size;j++) { 168 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 for (j=0;j<subset_size;j++) { 171 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 172 } 173 } else { 174 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 } 177 } else { 178 S = Sarray + cumarray; 179 St = Starray + cumarray; 180 } 181 182 /* Threshold: this is an heuristic for edges */ 183 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 184 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 185 186 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 187 PetscBLASInt B_itype = 1; 188 PetscBLASInt B_IL, B_IU; 189 PetscReal eps = -1.0; /* dlamch? */ 190 PetscInt nmin_s; 191 192 /* ask for eigenvalues larger than thresh */ 193 if (pcbddc->dbg_flag) { 194 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 195 } 196 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 197 #if defined(PETSC_USE_COMPLEX) 198 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 199 #else 200 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 201 #endif 202 ierr = PetscFPTrapPop();CHKERRQ(ierr); 203 if (B_ierr) { 204 if (B_ierr < 0 ) { 205 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 206 } else if (B_ierr <= B_N) { 207 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 208 } else { 209 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 210 } 211 } 212 213 if (B_neigs > nmax) { 214 if (pcbddc->dbg_flag) { 215 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 216 } 217 eigs_start = B_neigs -nmax; 218 B_neigs = nmax; 219 } 220 221 nmin_s = PetscMin(nmin,B_N); 222 if (B_neigs < nmin_s) { 223 PetscBLASInt B_neigs2; 224 225 B_IU = B_N - B_neigs; 226 B_IL = B_N - nmin_s + 1; 227 if (pcbddc->dbg_flag) { 228 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 229 } 230 if (sub_schurs->is_hermitian) { 231 PetscInt j; 232 for (j=0;j<subset_size;j++) { 233 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 234 } 235 for (j=0;j<subset_size;j++) { 236 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 237 } 238 } else { 239 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 240 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 241 } 242 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 243 #if defined(PETSC_USE_COMPLEX) 244 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 245 #else 246 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 247 #endif 248 ierr = PetscFPTrapPop();CHKERRQ(ierr); 249 B_neigs += B_neigs2; 250 } 251 if (B_ierr) { 252 if (B_ierr < 0 ) { 253 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 254 } else if (B_ierr <= B_N) { 255 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 256 } else { 257 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 258 } 259 } 260 if (pcbddc->dbg_flag) { 261 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 262 for (j=0;j<B_neigs;j++) { 263 if (eigs[j] == 0.0) { 264 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 265 } else { 266 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]); 267 } 268 } 269 } 270 } else { 271 /* TODO */ 272 } 273 274 maxneigs = PetscMax(B_neigs,maxneigs); 275 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 276 if (B_neigs) { 277 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 278 279 if (pcbddc->dbg_flag > 1) { 280 PetscInt ii; 281 for (ii=0;ii<B_neigs;ii++) { 282 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 283 for (j=0;j<B_N;j++) { 284 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 285 } 286 } 287 } 288 #if 0 289 for (j=0;j<B_neigs;j++) { 290 PetscBLASInt Blas_N,Blas_one = 1.0; 291 PetscScalar norm; 292 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 293 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 294 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 295 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 296 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 297 } else { 298 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 299 } 300 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 301 } 302 #endif 303 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 304 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 305 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 306 cum++; 307 } 308 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 309 } 310 /* shift for next computation */ 311 cumarray += subset_size*subset_size; 312 } 313 if (pcbddc->dbg_flag) { 314 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 315 } 316 317 if (mss) { 318 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 319 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 320 /* destroy matrices (junk) */ 321 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 322 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 323 } 324 if (allocated_S_St) { 325 ierr = PetscFree2(S,St);CHKERRQ(ierr); 326 } 327 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 328 #if defined(PETSC_USE_COMPLEX) 329 ierr = PetscFree(rwork);CHKERRQ(ierr); 330 #endif 331 if (pcbddc->dbg_flag) { 332 PetscInt maxneigs_r; 333 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 334 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 335 } 336 PetscFunctionReturn(0); 337 } 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "PCBDDCSetUpSolvers" 341 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 342 { 343 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 344 PetscScalar *coarse_submat_vals; 345 PetscErrorCode ierr; 346 347 PetscFunctionBegin; 348 /* Setup local scatters R_to_B and (optionally) R_to_D */ 349 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 350 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 351 352 /* Setup local neumann solver ksp_R */ 353 /* PCBDDCSetUpLocalScatters should be called first! */ 354 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 355 356 /* Change global null space passed in by the user if change of basis has been requested */ 357 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 358 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 359 } 360 361 /* 362 Setup local correction and local part of coarse basis. 363 Gives back the dense local part of the coarse matrix in column major ordering 364 */ 365 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 366 367 /* Compute total number of coarse nodes and setup coarse solver */ 368 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 369 370 /* free */ 371 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "PCBDDCResetCustomization" 377 PetscErrorCode PCBDDCResetCustomization(PC pc) 378 { 379 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 384 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 385 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 386 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 387 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 388 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 389 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 390 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 392 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 393 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 #undef __FUNCT__ 398 #define __FUNCT__ "PCBDDCResetTopography" 399 PetscErrorCode PCBDDCResetTopography(PC pc) 400 { 401 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 402 PetscErrorCode ierr; 403 404 PetscFunctionBegin; 405 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 406 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 407 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 408 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 409 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "PCBDDCResetSolvers" 415 PetscErrorCode PCBDDCResetSolvers(PC pc) 416 { 417 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 418 PetscScalar *array; 419 PetscErrorCode ierr; 420 421 PetscFunctionBegin; 422 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 423 if (pcbddc->coarse_phi_B) { 424 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 425 ierr = PetscFree(array);CHKERRQ(ierr); 426 } 427 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 428 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 429 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 430 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 431 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 432 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 433 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 434 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 435 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 436 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 437 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 438 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 439 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 440 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 441 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 442 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 443 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 444 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 445 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 446 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 447 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 448 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 449 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 450 PetscFunctionReturn(0); 451 } 452 453 #undef __FUNCT__ 454 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 455 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 456 { 457 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 458 PC_IS *pcis = (PC_IS*)pc->data; 459 VecType impVecType; 460 PetscInt n_constraints,n_R,old_size; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 if (!pcbddc->ConstraintMatrix) { 465 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 466 } 467 /* get sizes */ 468 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 469 n_R = pcis->n-pcbddc->n_vertices; 470 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 471 /* local work vectors (try to avoid unneeded work)*/ 472 /* R nodes */ 473 old_size = -1; 474 if (pcbddc->vec1_R) { 475 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 476 } 477 if (n_R != old_size) { 478 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 479 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 480 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 481 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 482 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 483 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 484 } 485 /* local primal dofs */ 486 old_size = -1; 487 if (pcbddc->vec1_P) { 488 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 489 } 490 if (pcbddc->local_primal_size != old_size) { 491 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 492 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 493 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 494 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 495 } 496 /* local explicit constraints */ 497 old_size = -1; 498 if (pcbddc->vec1_C) { 499 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 500 } 501 if (n_constraints && n_constraints != old_size) { 502 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 503 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 504 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 505 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 506 } 507 PetscFunctionReturn(0); 508 } 509 510 #undef __FUNCT__ 511 #define __FUNCT__ "PCBDDCSetUpCorrection" 512 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 513 { 514 PetscErrorCode ierr; 515 /* pointers to pcis and pcbddc */ 516 PC_IS* pcis = (PC_IS*)pc->data; 517 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 518 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 519 /* submatrices of local problem */ 520 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 521 /* submatrices of local coarse problem */ 522 Mat S_VV,S_CV,S_VC,S_CC; 523 /* working matrices */ 524 Mat C_CR; 525 /* additional working stuff */ 526 PC pc_R; 527 Mat F; 528 PetscBool isLU,isCHOL,isILU; 529 530 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 531 PetscScalar *work; 532 PetscInt *idx_V_B; 533 PetscInt n,n_vertices,n_constraints; 534 PetscInt i,n_R,n_D,n_B; 535 PetscBool unsymmetric_check; 536 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 537 MatType impMatType; 538 /* some shortcuts to scalars */ 539 PetscScalar one=1.0,m_one=-1.0; 540 541 PetscFunctionBegin; 542 n_vertices = pcbddc->n_vertices; 543 n_constraints = pcbddc->local_primal_size-n_vertices; 544 /* Set Non-overlapping dimensions */ 545 n_B = pcis->n_B; 546 n_D = pcis->n - n_B; 547 n_R = pcis->n - n_vertices; 548 549 /* Set types for local objects needed by BDDC precondtioner */ 550 impMatType = MATSEQDENSE; 551 552 /* vertices in boundary numbering */ 553 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 554 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 555 if (i != n_vertices) { 556 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 557 } 558 559 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 560 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 562 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 563 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 564 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 565 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 566 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 567 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 568 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 569 570 unsymmetric_check = PETSC_FALSE; 571 /* allocate workspace */ 572 n = 0; 573 if (n_constraints) { 574 n += n_R*n_constraints; 575 } 576 if (n_vertices) { 577 n = PetscMax(2*n_R*n_vertices,n); 578 n = PetscMax((n_R+n_B)*n_vertices,n); 579 } 580 if (!pcbddc->issym) { 581 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 582 unsymmetric_check = PETSC_TRUE; 583 } 584 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 585 586 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 587 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 588 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 589 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 590 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 591 if (isLU || isILU || isCHOL) { 592 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 593 } else if (sub_schurs->reuse_mumps) { 594 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 595 MatFactorType type; 596 597 F = reuse_mumps->F; 598 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 599 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 600 } else { 601 F = NULL; 602 } 603 604 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 605 if (n_constraints) { 606 Mat M1,M2,M3; 607 Mat auxmat; 608 IS is_aux; 609 PetscScalar *array,*array2; 610 611 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 612 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 613 614 /* Extract constraints on R nodes: C_{CR} */ 615 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 616 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 617 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 618 619 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 620 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 621 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 622 for (i=0;i<n_constraints;i++) { 623 const PetscScalar *row_cmat_values; 624 const PetscInt *row_cmat_indices; 625 PetscInt size_of_constraint,j; 626 627 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 628 for (j=0;j<size_of_constraint;j++) { 629 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 630 } 631 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 632 } 633 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 634 if (F) { 635 Mat B; 636 637 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 638 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 639 ierr = MatDestroy(&B);CHKERRQ(ierr); 640 } else { 641 PetscScalar *marr; 642 643 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 644 for (i=0;i<n_constraints;i++) { 645 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 646 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 647 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 648 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 649 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 650 } 651 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 652 } 653 if (!pcbddc->switch_static) { 654 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 655 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 656 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 657 for (i=0;i<n_constraints;i++) { 658 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 659 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 660 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 661 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 662 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 663 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 664 } 665 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 666 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 667 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 668 } else { 669 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 670 pcbddc->local_auxmat2 = local_auxmat2_R; 671 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 672 } 673 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 674 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 675 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 676 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 677 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 678 if (isCHOL) { 679 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 680 } else { 681 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 682 } 683 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 684 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 685 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 686 ierr = MatDestroy(&M2);CHKERRQ(ierr); 687 ierr = MatDestroy(&M3);CHKERRQ(ierr); 688 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 689 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 690 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 691 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 692 ierr = MatDestroy(&M1);CHKERRQ(ierr); 693 } 694 /* Get submatrices from subdomain matrix */ 695 if (n_vertices) { 696 Mat newmat; 697 IS is_aux; 698 PetscInt ibs,mbs; 699 PetscBool issbaij; 700 701 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 702 IS tis; 703 704 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 705 ierr = ISSort(tis);CHKERRQ(ierr); 706 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 707 ierr = ISDestroy(&tis);CHKERRQ(ierr); 708 } else { 709 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 710 } 711 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 712 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 713 if (ibs != mbs) { /* need to convert to SEQAIJ */ 714 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 715 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 716 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 717 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 718 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 719 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 720 } else { 721 /* this is safe */ 722 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 723 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 724 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 725 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 726 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 727 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 728 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 729 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 730 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 731 } else { 732 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 733 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 734 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 735 } 736 /* need to convert to SeqAIJ for MatMatMult with dense matrix */ 737 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 738 } 739 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 740 } 741 742 /* Matrix of coarse basis functions (local) */ 743 if (pcbddc->coarse_phi_B) { 744 PetscInt on_B,on_primal,on_D=n_D; 745 if (pcbddc->coarse_phi_D) { 746 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 747 } 748 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 749 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 750 PetscScalar *marray; 751 752 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 753 ierr = PetscFree(marray);CHKERRQ(ierr); 754 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 755 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 756 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 757 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 758 } 759 } 760 761 if (!pcbddc->coarse_phi_B) { 762 PetscScalar *marray; 763 764 n = n_B*pcbddc->local_primal_size; 765 if (pcbddc->switch_static || pcbddc->dbg_flag) { 766 n += n_D*pcbddc->local_primal_size; 767 } 768 if (!pcbddc->issym) { 769 n *= 2; 770 } 771 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 772 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 773 n = n_B*pcbddc->local_primal_size; 774 if (pcbddc->switch_static || pcbddc->dbg_flag) { 775 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 776 n += n_D*pcbddc->local_primal_size; 777 } 778 if (!pcbddc->issym) { 779 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 780 if (pcbddc->switch_static || pcbddc->dbg_flag) { 781 n = n_B*pcbddc->local_primal_size; 782 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 783 } 784 } else { 785 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 786 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 787 if (pcbddc->switch_static || pcbddc->dbg_flag) { 788 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 789 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 790 } 791 } 792 } 793 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 794 /* vertices */ 795 if (n_vertices) { 796 797 if (n_R) { 798 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 799 PetscBLASInt B_N,B_one = 1; 800 PetscScalar *x,*y; 801 802 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 803 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 804 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 805 if (F) { /* TODO could be optimized for symmetric problems */ 806 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 807 } else { 808 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 809 for (i=0;i<n_vertices;i++) { 810 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 811 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 812 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 813 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 814 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 815 } 816 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 817 } 818 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 819 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 820 if (n_constraints) { 821 Mat B; 822 823 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 824 for (i=0;i<n_vertices;i++) { 825 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 826 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 827 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 828 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 829 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 830 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 831 } 832 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 833 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 834 ierr = MatDestroy(&B);CHKERRQ(ierr); 835 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 836 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 837 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 838 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 839 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 840 ierr = MatDestroy(&B);CHKERRQ(ierr); 841 } 842 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 843 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 844 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 845 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 846 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 847 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 848 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 849 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 850 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 851 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 852 } else { 853 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 854 } 855 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 856 /* coarse basis functions */ 857 for (i=0;i<n_vertices;i++) { 858 PetscScalar *y; 859 860 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 861 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 862 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 863 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 864 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 865 y[n_B*i+idx_V_B[i]] = 1.0; 866 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 867 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 868 869 if (pcbddc->switch_static || pcbddc->dbg_flag) { 870 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 871 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 872 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 875 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 876 } 877 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 878 } 879 } 880 881 if (n_constraints) { 882 Mat B; 883 884 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 885 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 886 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 887 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 888 if (n_vertices) { 889 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 890 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 891 } else { 892 Mat S_VCt; 893 894 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 895 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 896 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 897 } 898 } 899 ierr = MatDestroy(&B);CHKERRQ(ierr); 900 /* coarse basis functions */ 901 for (i=0;i<n_constraints;i++) { 902 PetscScalar *y; 903 904 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 905 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 906 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 907 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 910 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 911 if (pcbddc->switch_static || pcbddc->dbg_flag) { 912 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 913 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 914 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 917 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 918 } 919 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 920 } 921 } 922 if (n_constraints) { 923 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 924 } 925 926 /* compute other basis functions for non-symmetric problems */ 927 if (!pcbddc->issym) { 928 929 if (n_constraints) { 930 Mat S_CCT,B_C; 931 932 /* this is a lazy thing */ 933 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 934 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 935 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 936 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 937 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 938 if (n_vertices) { 939 Mat B_V,S_VCT; 940 941 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 942 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 943 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 944 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 945 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 946 } 947 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 948 } else { /* if there are no constraints, reset work */ 949 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 950 } 951 if (n_vertices && n_R) { 952 Mat A_VRT; 953 PetscScalar *marray; 954 PetscBLASInt B_N,B_one = 1; 955 956 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 957 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 958 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 959 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 960 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 961 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 962 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 963 } 964 965 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 966 for (i=0;i<pcbddc->local_primal_size;i++) { 967 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 968 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 969 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 970 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 971 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 972 } 973 } else { 974 for (i=0;i<pcbddc->local_primal_size;i++) { 975 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 976 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 977 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 978 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 979 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 980 } 981 } 982 /* coarse basis functions */ 983 for (i=0;i<pcbddc->local_primal_size;i++) { 984 PetscScalar *y; 985 986 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 987 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 988 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 989 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991 if (i<n_vertices) { 992 y[n_B*i+idx_V_B[i]] = 1.0; 993 } 994 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 995 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 996 997 if (pcbddc->switch_static || pcbddc->dbg_flag) { 998 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 999 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1003 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1004 } 1005 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1006 } 1007 } 1008 /* free memory */ 1009 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1010 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1011 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1012 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1013 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1014 ierr = PetscFree(work);CHKERRQ(ierr); 1015 if (n_vertices) { 1016 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1017 } 1018 if (n_constraints) { 1019 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1020 } 1021 /* Checking coarse_sub_mat and coarse basis functios */ 1022 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1023 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1024 if (pcbddc->dbg_flag) { 1025 Mat coarse_sub_mat; 1026 Mat AUXMAT,TM1,TM2,TM3,TM4; 1027 Mat coarse_phi_D,coarse_phi_B; 1028 Mat coarse_psi_D,coarse_psi_B; 1029 Mat A_II,A_BB,A_IB,A_BI; 1030 Mat C_B,CPHI; 1031 IS is_dummy; 1032 Vec mones; 1033 MatType checkmattype=MATSEQAIJ; 1034 PetscReal real_value; 1035 1036 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1037 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1038 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1039 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1040 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1041 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1042 if (unsymmetric_check) { 1043 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1044 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1045 } 1046 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1047 1048 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1049 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 1050 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1051 if (unsymmetric_check) { 1052 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1053 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1054 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1055 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1056 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1057 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1058 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1059 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1060 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1061 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1062 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1063 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1064 } else { 1065 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1066 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1067 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1068 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1069 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1070 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1071 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1072 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1073 } 1074 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1075 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1076 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1077 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1078 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1079 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1081 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1082 1083 /* check constraints */ 1084 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1085 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1086 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1087 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1088 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1089 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1090 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1091 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1092 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1093 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1094 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1095 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1096 1097 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1098 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1099 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1100 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1101 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1102 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1103 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1104 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1105 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1106 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1107 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1108 if (unsymmetric_check) { 1109 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1110 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1111 } 1112 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1113 } 1114 /* get back data */ 1115 *coarse_submat_vals_n = coarse_submat_vals; 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1121 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1122 { 1123 Mat *work_mat; 1124 IS isrow_s,iscol_s; 1125 PetscBool rsorted,csorted; 1126 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1131 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1132 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1133 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1134 1135 if (!rsorted) { 1136 const PetscInt *idxs; 1137 PetscInt *idxs_sorted,i; 1138 1139 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1140 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1141 for (i=0;i<rsize;i++) { 1142 idxs_perm_r[i] = i; 1143 } 1144 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1145 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1146 for (i=0;i<rsize;i++) { 1147 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1148 } 1149 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1150 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1151 } else { 1152 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1153 isrow_s = isrow; 1154 } 1155 1156 if (!csorted) { 1157 if (isrow == iscol) { 1158 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1159 iscol_s = isrow_s; 1160 } else { 1161 const PetscInt *idxs; 1162 PetscInt *idxs_sorted,i; 1163 1164 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1165 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1166 for (i=0;i<csize;i++) { 1167 idxs_perm_c[i] = i; 1168 } 1169 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1170 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1171 for (i=0;i<csize;i++) { 1172 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1173 } 1174 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1175 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1176 } 1177 } else { 1178 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1179 iscol_s = iscol; 1180 } 1181 1182 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1183 1184 if (!rsorted || !csorted) { 1185 Mat new_mat; 1186 IS is_perm_r,is_perm_c; 1187 1188 if (!rsorted) { 1189 PetscInt *idxs_r,i; 1190 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1191 for (i=0;i<rsize;i++) { 1192 idxs_r[idxs_perm_r[i]] = i; 1193 } 1194 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1195 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1196 } else { 1197 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1198 } 1199 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1200 1201 if (!csorted) { 1202 if (isrow_s == iscol_s) { 1203 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1204 is_perm_c = is_perm_r; 1205 } else { 1206 PetscInt *idxs_c,i; 1207 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1208 for (i=0;i<csize;i++) { 1209 idxs_c[idxs_perm_c[i]] = i; 1210 } 1211 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1212 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1213 } 1214 } else { 1215 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1216 } 1217 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1218 1219 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1220 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1221 work_mat[0] = new_mat; 1222 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1223 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1224 } 1225 1226 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1227 *B = work_mat[0]; 1228 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1229 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1230 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1236 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1237 { 1238 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1239 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1240 Mat new_mat; 1241 IS is_local,is_global; 1242 PetscInt local_size; 1243 PetscBool isseqaij; 1244 PetscErrorCode ierr; 1245 1246 PetscFunctionBegin; 1247 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1248 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1249 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1250 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1251 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1252 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1253 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1254 1255 /* check */ 1256 if (pcbddc->dbg_flag) { 1257 Vec x,x_change; 1258 PetscReal error; 1259 1260 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1261 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1262 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1263 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1266 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1267 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1268 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1269 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1270 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1271 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1272 ierr = VecDestroy(&x);CHKERRQ(ierr); 1273 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1274 } 1275 1276 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1277 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1278 if (isseqaij) { 1279 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1280 } else { 1281 Mat work_mat; 1282 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1283 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1284 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1285 } 1286 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1287 #if !defined(PETSC_USE_COMPLEX) 1288 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr); 1289 #endif 1290 /* 1291 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1292 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1293 */ 1294 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 #undef __FUNCT__ 1299 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1300 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1301 { 1302 PC_IS* pcis = (PC_IS*)(pc->data); 1303 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1304 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1305 IS is_B_reuse_mumps=NULL; 1306 PetscInt *idx_R_local; 1307 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1308 PetscInt vbs,bs; 1309 PetscBT bitmask=NULL; 1310 PetscErrorCode ierr; 1311 1312 PetscFunctionBegin; 1313 /* 1314 No need to setup local scatters if 1315 - primal space is unchanged 1316 AND 1317 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1318 AND 1319 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1320 */ 1321 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1322 PetscFunctionReturn(0); 1323 } 1324 /* destroy old objects */ 1325 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1326 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1327 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1328 /* Set Non-overlapping dimensions */ 1329 n_B = pcis->n_B; 1330 n_D = pcis->n - n_B; 1331 n_vertices = pcbddc->n_vertices; 1332 1333 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1334 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1335 1336 /* create auxiliary bitmask */ 1337 if (!sub_schurs->reuse_mumps) { 1338 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1339 for (i=0;i<n_vertices;i++) { 1340 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1341 } 1342 1343 for (i=0, n_R=0; i<pcis->n; i++) { 1344 if (!PetscBTLookup(bitmask,i)) { 1345 idx_R_local[n_R++] = i; 1346 } 1347 } 1348 } else { /* A different ordering and auxiliary scatters are needed we are reusing MUMPS Schur solver */ 1349 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1350 const PetscInt *idxs; 1351 PetscInt schur_size; 1352 IS list[2],tisB_N,tisR; 1353 1354 if (sub_schurs->is_dir) { 1355 list[0] = sub_schurs->is_Ej_all; 1356 list[1] = sub_schurs->is_dir; 1357 ierr = ISGlobalToLocalMappingApplyIS(pcis->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_dir,&list[1]);CHKERRQ(ierr); 1358 ierr = ISConcatenate(PETSC_COMM_SELF,2,list,&is_B_reuse_mumps);CHKERRQ(ierr); 1359 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1360 } else { 1361 ierr = ISDuplicate(sub_schurs->is_Ej_all,&is_B_reuse_mumps);CHKERRQ(ierr); 1362 } 1363 ierr = ISGetLocalSize(is_B_reuse_mumps,&schur_size);CHKERRQ(ierr); 1364 if (reuse_mumps->S_inv) { 1365 ierr = VecScatterDestroy(&reuse_mumps->correction_scatter_B);CHKERRQ(ierr); 1366 ierr = VecScatterCreate(pcis->vec1_B,is_B_reuse_mumps,reuse_mumps->solB,NULL,&reuse_mumps->correction_scatter_B);CHKERRQ(ierr); 1367 } 1368 ierr = ISLocalToGlobalMappingApplyIS(sub_schurs->BtoNmap,is_B_reuse_mumps,&tisB_N);CHKERRQ(ierr); 1369 list[0] = pcis->is_I_local; 1370 list[1] = tisB_N; 1371 ierr = ISConcatenate(PETSC_COMM_SELF,2,list,&tisR);CHKERRQ(ierr); 1372 ierr = ISDestroy(&tisB_N);CHKERRQ(ierr); 1373 ierr = ISGetLocalSize(tisR,&n_R);CHKERRQ(ierr); 1374 ierr = ISGetIndices(tisR,&idxs);CHKERRQ(ierr); 1375 ierr = PetscMemcpy(idx_R_local,idxs,n_R*sizeof(PetscInt));CHKERRQ(ierr); 1376 ierr = ISRestoreIndices(tisR,&idxs);CHKERRQ(ierr); 1377 ierr = ISDestroy(&tisR);CHKERRQ(ierr); 1378 } 1379 1380 /* Block code */ 1381 vbs = 1; 1382 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1383 if (bs>1 && !(n_vertices%bs)) { 1384 PetscBool is_blocked = PETSC_TRUE; 1385 PetscInt *vary; 1386 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1387 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1388 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1389 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1390 for (i=0; i<n_vertices/bs; i++) { 1391 if (vary[i]!=0 && vary[i]!=bs) { 1392 is_blocked = PETSC_FALSE; 1393 break; 1394 } 1395 } 1396 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1397 vbs = bs; 1398 for (i=0;i<n_R/vbs;i++) { 1399 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1400 } 1401 } 1402 ierr = PetscFree(vary);CHKERRQ(ierr); 1403 } 1404 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1405 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1406 1407 /* print some info if requested */ 1408 if (pcbddc->dbg_flag) { 1409 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1410 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1411 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1412 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1413 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1414 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1415 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1416 } 1417 1418 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1419 if (!sub_schurs->reuse_mumps) { 1420 IS is_aux1,is_aux2; 1421 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1422 1423 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1424 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1425 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1426 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1427 for (i=0; i<n_D; i++) { 1428 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1429 } 1430 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1431 for (i=0, j=0; i<n_R; i++) { 1432 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1433 aux_array1[j++] = i; 1434 } 1435 } 1436 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1437 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1438 for (i=0, j=0; i<n_B; i++) { 1439 if (!PetscBTLookup(bitmask,is_indices[i])) { 1440 aux_array2[j++] = i; 1441 } 1442 } 1443 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1444 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1445 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1446 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1447 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1448 1449 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1450 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1451 for (i=0, j=0; i<n_R; i++) { 1452 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1453 aux_array1[j++] = i; 1454 } 1455 } 1456 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1457 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1458 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1459 } 1460 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1461 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1462 } else { 1463 IS tis; 1464 PetscInt schur_size; 1465 1466 ierr = ISGetLocalSize(is_B_reuse_mumps,&schur_size);CHKERRQ(ierr); 1467 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1468 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,is_B_reuse_mumps,&pcbddc->R_to_B);CHKERRQ(ierr); 1469 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1470 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1471 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1472 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1473 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1474 } 1475 } 1476 ierr = ISDestroy(&is_B_reuse_mumps);CHKERRQ(ierr); 1477 PetscFunctionReturn(0); 1478 } 1479 1480 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1483 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1484 { 1485 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1486 PC_IS *pcis = (PC_IS*)pc->data; 1487 PC pc_temp; 1488 Mat A_RR; 1489 MatReuse reuse; 1490 PetscScalar m_one = -1.0; 1491 PetscReal value; 1492 PetscInt n_D,n_R,ibs,mbs; 1493 PetscBool use_exact,use_exact_reduced,issbaij; 1494 PetscErrorCode ierr; 1495 /* prefixes stuff */ 1496 char dir_prefix[256],neu_prefix[256],str_level[16]; 1497 size_t len; 1498 1499 PetscFunctionBegin; 1500 1501 /* compute prefixes */ 1502 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1503 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1504 if (!pcbddc->current_level) { 1505 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1506 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1507 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1508 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1509 } else { 1510 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1511 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1512 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1513 len -= 15; /* remove "pc_bddc_coarse_" */ 1514 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1515 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1516 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1517 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1518 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1519 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1520 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1521 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1522 } 1523 1524 /* DIRICHLET PROBLEM */ 1525 if (dirichlet) { 1526 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1527 if (pcbddc->issym) { 1528 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1529 } 1530 /* Matrix for Dirichlet problem is pcis->A_II */ 1531 n_D = pcis->n - pcis->n_B; 1532 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1533 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1534 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1535 /* default */ 1536 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1537 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1538 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1539 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1540 if (issbaij) { 1541 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1542 } else { 1543 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1544 } 1545 /* Allow user's customization */ 1546 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1547 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1548 } 1549 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1550 if (sub_schurs->reuse_mumps) { 1551 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1552 1553 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1554 } 1555 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1556 if (!n_D) { 1557 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1558 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1559 } 1560 /* Set Up KSP for Dirichlet problem of BDDC */ 1561 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1562 /* set ksp_D into pcis data */ 1563 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1564 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1565 pcis->ksp_D = pcbddc->ksp_D; 1566 } 1567 1568 /* NEUMANN PROBLEM */ 1569 A_RR = 0; 1570 if (neumann) { 1571 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1572 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1573 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1574 if (pcbddc->ksp_R) { /* already created ksp */ 1575 PetscInt nn_R; 1576 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1577 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1578 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1579 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1580 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1581 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1582 reuse = MAT_INITIAL_MATRIX; 1583 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1584 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1585 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1586 reuse = MAT_INITIAL_MATRIX; 1587 } else { /* safe to reuse the matrix */ 1588 reuse = MAT_REUSE_MATRIX; 1589 } 1590 } 1591 /* last check */ 1592 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1593 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1594 reuse = MAT_INITIAL_MATRIX; 1595 } 1596 } else { /* first time, so we need to create the matrix */ 1597 reuse = MAT_INITIAL_MATRIX; 1598 } 1599 /* extract A_RR */ 1600 if (!sub_schurs->reuse_mumps) { 1601 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1602 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1603 if (ibs != mbs) { 1604 Mat newmat; 1605 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1606 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1607 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1608 } else { 1609 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1610 } 1611 if (pcbddc->issym) { 1612 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1613 } 1614 } else { 1615 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1616 1617 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1618 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1619 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1620 } 1621 if (!pcbddc->ksp_R) { /* create object if not present */ 1622 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1623 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1624 /* default */ 1625 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1626 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1627 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1628 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1629 if (issbaij) { 1630 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1631 } else { 1632 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1633 } 1634 /* Allow user's customization */ 1635 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1636 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1637 } 1638 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1639 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1640 if (!n_R) { 1641 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1642 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1643 } 1644 /* Reuse MUMPS solver if it is present */ 1645 if (sub_schurs->reuse_mumps) { 1646 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1647 1648 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1649 } 1650 /* Set Up KSP for Neumann problem of BDDC */ 1651 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1652 } 1653 /* free Neumann problem's matrix */ 1654 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1655 1656 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1657 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1658 if (pcbddc->dbg_flag) { 1659 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1660 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1661 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1662 } 1663 if (dirichlet) { /* Dirichlet */ 1664 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1665 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1666 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1667 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1668 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1669 /* need to be adapted? */ 1670 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1671 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1672 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1673 /* print info */ 1674 if (pcbddc->dbg_flag) { 1675 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1676 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1677 } 1678 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1679 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1680 } 1681 } 1682 if (neumann) { /* Neumann */ 1683 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1684 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1685 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1686 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1687 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1688 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1689 /* need to be adapted? */ 1690 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1691 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1692 /* print info */ 1693 if (pcbddc->dbg_flag) { 1694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1695 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1696 } 1697 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1698 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1699 } 1700 } 1701 } 1702 PetscFunctionReturn(0); 1703 } 1704 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1707 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1708 { 1709 PetscErrorCode ierr; 1710 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1711 1712 PetscFunctionBegin; 1713 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1714 if (!pcbddc->switch_static) { 1715 if (applytranspose && pcbddc->local_auxmat1) { 1716 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1717 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1718 } 1719 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1720 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1721 } else { 1722 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1723 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1724 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1725 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1726 if (applytranspose && pcbddc->local_auxmat1) { 1727 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1728 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1729 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1730 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1731 } 1732 } 1733 if (applytranspose) { 1734 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1735 } else { 1736 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1737 } 1738 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1739 if (!pcbddc->switch_static) { 1740 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1741 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1742 if (!applytranspose && pcbddc->local_auxmat1) { 1743 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1744 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1745 } 1746 } else { 1747 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1748 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1749 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1750 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1751 if (!applytranspose && pcbddc->local_auxmat1) { 1752 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1753 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1754 } 1755 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1756 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1757 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1758 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1759 } 1760 PetscFunctionReturn(0); 1761 } 1762 1763 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1766 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1767 { 1768 PetscErrorCode ierr; 1769 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1770 PC_IS* pcis = (PC_IS*) (pc->data); 1771 const PetscScalar zero = 0.0; 1772 1773 PetscFunctionBegin; 1774 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1775 if (applytranspose) { 1776 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1777 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1778 } else { 1779 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1780 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1781 } 1782 /* start communications from local primal nodes to rhs of coarse solver */ 1783 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1784 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1785 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1786 1787 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1788 /* TODO remove null space when doing multilevel */ 1789 if (pcbddc->coarse_ksp) { 1790 Vec rhs,sol; 1791 1792 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1793 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1794 if (applytranspose) { 1795 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1796 } else { 1797 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1798 } 1799 } 1800 1801 /* Local solution on R nodes */ 1802 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1803 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1804 } 1805 1806 /* communications from coarse sol to local primal nodes */ 1807 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1808 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1809 1810 /* Sum contributions from two levels */ 1811 if (applytranspose) { 1812 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1813 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1814 } else { 1815 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1816 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1817 } 1818 PetscFunctionReturn(0); 1819 } 1820 1821 #undef __FUNCT__ 1822 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1823 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1824 { 1825 PetscErrorCode ierr; 1826 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1827 PetscScalar *array; 1828 Vec from,to; 1829 1830 PetscFunctionBegin; 1831 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1832 from = pcbddc->coarse_vec; 1833 to = pcbddc->vec1_P; 1834 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1835 Vec tvec; 1836 1837 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1838 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1839 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1840 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1841 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1842 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1843 } 1844 } else { /* from local to global -> put data in coarse right hand side */ 1845 from = pcbddc->vec1_P; 1846 to = pcbddc->coarse_vec; 1847 } 1848 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1849 PetscFunctionReturn(0); 1850 } 1851 1852 #undef __FUNCT__ 1853 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1854 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1855 { 1856 PetscErrorCode ierr; 1857 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1858 PetscScalar *array; 1859 Vec from,to; 1860 1861 PetscFunctionBegin; 1862 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1863 from = pcbddc->coarse_vec; 1864 to = pcbddc->vec1_P; 1865 } else { /* from local to global -> put data in coarse right hand side */ 1866 from = pcbddc->vec1_P; 1867 to = pcbddc->coarse_vec; 1868 } 1869 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1870 if (smode == SCATTER_FORWARD) { 1871 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1872 Vec tvec; 1873 1874 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1875 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1876 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1877 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1878 } 1879 } else { 1880 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1881 ierr = VecResetArray(from);CHKERRQ(ierr); 1882 } 1883 } 1884 PetscFunctionReturn(0); 1885 } 1886 1887 /* uncomment for testing purposes */ 1888 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1889 #undef __FUNCT__ 1890 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1891 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1892 { 1893 PetscErrorCode ierr; 1894 PC_IS* pcis = (PC_IS*)(pc->data); 1895 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1896 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1897 /* one and zero */ 1898 PetscScalar one=1.0,zero=0.0; 1899 /* space to store constraints and their local indices */ 1900 PetscScalar *constraints_data; 1901 PetscInt *constraints_idxs,*constraints_idxs_B; 1902 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1903 PetscInt *constraints_n; 1904 /* iterators */ 1905 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1906 /* BLAS integers */ 1907 PetscBLASInt lwork,lierr; 1908 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1909 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1910 /* reuse */ 1911 PetscInt olocal_primal_size,olocal_primal_size_cc; 1912 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1913 /* change of basis */ 1914 PetscBool qr_needed; 1915 PetscBT change_basis,qr_needed_idx; 1916 /* auxiliary stuff */ 1917 PetscInt *nnz,*is_indices; 1918 PetscInt ncc; 1919 /* some quantities */ 1920 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1921 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1922 1923 PetscFunctionBegin; 1924 /* Destroy Mat objects computed previously */ 1925 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1926 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1927 /* save info on constraints from previous setup (if any) */ 1928 olocal_primal_size = pcbddc->local_primal_size; 1929 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1930 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1931 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1932 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1933 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1934 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1935 1936 /* print some info */ 1937 if (pcbddc->dbg_flag) { 1938 IS vertices; 1939 PetscInt nv,nedges,nfaces; 1940 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1941 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1942 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1943 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1944 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1945 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1946 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1947 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1948 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1949 } 1950 1951 if (!pcbddc->adaptive_selection) { 1952 IS ISForVertices,*ISForFaces,*ISForEdges; 1953 MatNullSpace nearnullsp; 1954 const Vec *nearnullvecs; 1955 Vec *localnearnullsp; 1956 PetscScalar *array; 1957 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1958 PetscBool nnsp_has_cnst; 1959 /* LAPACK working arrays for SVD or POD */ 1960 PetscBool skip_lapack,boolforchange; 1961 PetscScalar *work; 1962 PetscReal *singular_vals; 1963 #if defined(PETSC_USE_COMPLEX) 1964 PetscReal *rwork; 1965 #endif 1966 #if defined(PETSC_MISSING_LAPACK_GESVD) 1967 PetscScalar *temp_basis,*correlation_mat; 1968 #else 1969 PetscBLASInt dummy_int=1; 1970 PetscScalar dummy_scalar=1.; 1971 #endif 1972 1973 /* Get index sets for faces, edges and vertices from graph */ 1974 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1975 /* free unneeded index sets */ 1976 if (!pcbddc->use_vertices) { 1977 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1978 } 1979 if (!pcbddc->use_edges) { 1980 for (i=0;i<n_ISForEdges;i++) { 1981 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1982 } 1983 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1984 n_ISForEdges = 0; 1985 } 1986 if (!pcbddc->use_faces) { 1987 for (i=0;i<n_ISForFaces;i++) { 1988 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1989 } 1990 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1991 n_ISForFaces = 0; 1992 } 1993 1994 #if defined(PETSC_USE_DEBUG) 1995 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 1996 Also use_change_of_basis should be consistent among processors */ 1997 if (pcbddc->NullSpace) { 1998 PetscBool tbool[2],gbool[2]; 1999 2000 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2001 pcbddc->use_change_of_basis = PETSC_TRUE; 2002 if (!ISForEdges) { 2003 pcbddc->use_change_on_faces = PETSC_TRUE; 2004 } 2005 } 2006 tbool[0] = pcbddc->use_change_of_basis; 2007 tbool[1] = pcbddc->use_change_on_faces; 2008 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2009 pcbddc->use_change_of_basis = gbool[0]; 2010 pcbddc->use_change_on_faces = gbool[1]; 2011 } 2012 #endif 2013 2014 /* check if near null space is attached to global mat */ 2015 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2016 if (nearnullsp) { 2017 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2018 /* remove any stored info */ 2019 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2020 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2021 /* store information for BDDC solver reuse */ 2022 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2023 pcbddc->onearnullspace = nearnullsp; 2024 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2025 for (i=0;i<nnsp_size;i++) { 2026 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2027 } 2028 } else { /* if near null space is not provided BDDC uses constants by default */ 2029 nnsp_size = 0; 2030 nnsp_has_cnst = PETSC_TRUE; 2031 } 2032 /* get max number of constraints on a single cc */ 2033 max_constraints = nnsp_size; 2034 if (nnsp_has_cnst) max_constraints++; 2035 2036 /* 2037 Evaluate maximum storage size needed by the procedure 2038 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2039 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2040 There can be multiple constraints per connected component 2041 */ 2042 n_vertices = 0; 2043 if (ISForVertices) { 2044 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2045 } 2046 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2047 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2048 2049 total_counts = n_ISForFaces+n_ISForEdges; 2050 total_counts *= max_constraints; 2051 total_counts += n_vertices; 2052 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2053 2054 total_counts = 0; 2055 max_size_of_constraint = 0; 2056 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2057 IS used_is; 2058 if (i<n_ISForEdges) { 2059 used_is = ISForEdges[i]; 2060 } else { 2061 used_is = ISForFaces[i-n_ISForEdges]; 2062 } 2063 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2064 total_counts += j; 2065 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2066 } 2067 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2068 2069 /* get local part of global near null space vectors */ 2070 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2071 for (k=0;k<nnsp_size;k++) { 2072 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2073 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2074 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2075 } 2076 2077 /* whether or not to skip lapack calls */ 2078 skip_lapack = PETSC_TRUE; 2079 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2080 2081 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2082 if (!skip_lapack) { 2083 PetscScalar temp_work; 2084 2085 #if defined(PETSC_MISSING_LAPACK_GESVD) 2086 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2087 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2088 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2089 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2090 #if defined(PETSC_USE_COMPLEX) 2091 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2092 #endif 2093 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2094 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2095 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2096 lwork = -1; 2097 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2098 #if !defined(PETSC_USE_COMPLEX) 2099 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2100 #else 2101 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2102 #endif 2103 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2104 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2105 #else /* on missing GESVD */ 2106 /* SVD */ 2107 PetscInt max_n,min_n; 2108 max_n = max_size_of_constraint; 2109 min_n = max_constraints; 2110 if (max_size_of_constraint < max_constraints) { 2111 min_n = max_size_of_constraint; 2112 max_n = max_constraints; 2113 } 2114 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2115 #if defined(PETSC_USE_COMPLEX) 2116 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2117 #endif 2118 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2119 lwork = -1; 2120 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2121 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2122 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2123 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2124 #if !defined(PETSC_USE_COMPLEX) 2125 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2126 #else 2127 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2128 #endif 2129 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2130 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2131 #endif /* on missing GESVD */ 2132 /* Allocate optimal workspace */ 2133 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2134 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2135 } 2136 /* Now we can loop on constraining sets */ 2137 total_counts = 0; 2138 constraints_idxs_ptr[0] = 0; 2139 constraints_data_ptr[0] = 0; 2140 /* vertices */ 2141 if (n_vertices) { 2142 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2143 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2144 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2145 for (i=0;i<n_vertices;i++) { 2146 constraints_n[total_counts] = 1; 2147 constraints_data[total_counts] = 1.0; 2148 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2149 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2150 total_counts++; 2151 } 2152 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2153 PetscBool used_vertex; 2154 for (i=0;i<n_vertices;i++) { 2155 used_vertex = PETSC_FALSE; 2156 k = 0; 2157 while (!used_vertex && k<nnsp_size) { 2158 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2159 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2160 constraints_n[total_counts] = 1; 2161 constraints_idxs[total_counts] = is_indices[i]; 2162 constraints_data[total_counts] = 1.0; 2163 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2164 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2165 total_counts++; 2166 used_vertex = PETSC_TRUE; 2167 } 2168 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2169 k++; 2170 } 2171 } 2172 } 2173 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2174 n_vertices = total_counts; 2175 } 2176 2177 /* edges and faces */ 2178 total_counts_cc = total_counts; 2179 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2180 IS used_is; 2181 PetscBool idxs_copied = PETSC_FALSE; 2182 2183 if (ncc<n_ISForEdges) { 2184 used_is = ISForEdges[ncc]; 2185 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2186 } else { 2187 used_is = ISForFaces[ncc-n_ISForEdges]; 2188 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2189 } 2190 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2191 2192 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2193 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2194 /* change of basis should not be performed on local periodic nodes */ 2195 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2196 if (nnsp_has_cnst) { 2197 PetscScalar quad_value; 2198 2199 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2200 idxs_copied = PETSC_TRUE; 2201 2202 if (!pcbddc->use_nnsp_true) { 2203 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2204 } else { 2205 quad_value = 1.0; 2206 } 2207 for (j=0;j<size_of_constraint;j++) { 2208 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2209 } 2210 temp_constraints++; 2211 total_counts++; 2212 } 2213 for (k=0;k<nnsp_size;k++) { 2214 PetscReal real_value; 2215 PetscScalar *ptr_to_data; 2216 2217 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2218 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2219 for (j=0;j<size_of_constraint;j++) { 2220 ptr_to_data[j] = array[is_indices[j]]; 2221 } 2222 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2223 /* check if array is null on the connected component */ 2224 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2225 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2226 if (real_value > 0.0) { /* keep indices and values */ 2227 temp_constraints++; 2228 total_counts++; 2229 if (!idxs_copied) { 2230 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2231 idxs_copied = PETSC_TRUE; 2232 } 2233 } 2234 } 2235 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2236 valid_constraints = temp_constraints; 2237 if (!pcbddc->use_nnsp_true && temp_constraints) { 2238 if (temp_constraints == 1) { /* just normalize the constraint */ 2239 PetscScalar norm,*ptr_to_data; 2240 2241 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2242 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2243 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2244 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2245 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2246 } else { /* perform SVD */ 2247 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2248 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2249 2250 #if defined(PETSC_MISSING_LAPACK_GESVD) 2251 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2252 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2253 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2254 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2255 from that computed using LAPACKgesvd 2256 -> This is due to a different computation of eigenvectors in LAPACKheev 2257 -> The quality of the POD-computed basis will be the same */ 2258 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2259 /* Store upper triangular part of correlation matrix */ 2260 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2261 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2262 for (j=0;j<temp_constraints;j++) { 2263 for (k=0;k<j+1;k++) { 2264 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2265 } 2266 } 2267 /* compute eigenvalues and eigenvectors of correlation matrix */ 2268 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2269 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2270 #if !defined(PETSC_USE_COMPLEX) 2271 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2272 #else 2273 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2274 #endif 2275 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2276 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2277 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2278 j = 0; 2279 while (j < temp_constraints && singular_vals[j] < tol) j++; 2280 total_counts = total_counts-j; 2281 valid_constraints = temp_constraints-j; 2282 /* scale and copy POD basis into used quadrature memory */ 2283 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2284 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2285 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2286 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2287 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2288 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2289 if (j<temp_constraints) { 2290 PetscInt ii; 2291 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2292 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2293 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2294 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2295 for (k=0;k<temp_constraints-j;k++) { 2296 for (ii=0;ii<size_of_constraint;ii++) { 2297 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2298 } 2299 } 2300 } 2301 #else /* on missing GESVD */ 2302 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2303 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2304 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2305 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2306 #if !defined(PETSC_USE_COMPLEX) 2307 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2308 #else 2309 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2310 #endif 2311 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2312 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2313 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2314 k = temp_constraints; 2315 if (k > size_of_constraint) k = size_of_constraint; 2316 j = 0; 2317 while (j < k && singular_vals[k-j-1] < tol) j++; 2318 valid_constraints = k-j; 2319 total_counts = total_counts-temp_constraints+valid_constraints; 2320 #endif /* on missing GESVD */ 2321 } 2322 } 2323 /* update pointers information */ 2324 if (valid_constraints) { 2325 constraints_n[total_counts_cc] = valid_constraints; 2326 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2327 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2328 /* set change_of_basis flag */ 2329 if (boolforchange) { 2330 PetscBTSet(change_basis,total_counts_cc); 2331 } 2332 total_counts_cc++; 2333 } 2334 } 2335 /* free workspace */ 2336 if (!skip_lapack) { 2337 ierr = PetscFree(work);CHKERRQ(ierr); 2338 #if defined(PETSC_USE_COMPLEX) 2339 ierr = PetscFree(rwork);CHKERRQ(ierr); 2340 #endif 2341 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2342 #if defined(PETSC_MISSING_LAPACK_GESVD) 2343 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2344 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2345 #endif 2346 } 2347 for (k=0;k<nnsp_size;k++) { 2348 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2349 } 2350 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2351 /* free index sets of faces, edges and vertices */ 2352 for (i=0;i<n_ISForFaces;i++) { 2353 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2354 } 2355 if (n_ISForFaces) { 2356 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2357 } 2358 for (i=0;i<n_ISForEdges;i++) { 2359 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2360 } 2361 if (n_ISForEdges) { 2362 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2363 } 2364 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2365 } else { 2366 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2367 2368 total_counts = 0; 2369 n_vertices = 0; 2370 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2371 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2372 } 2373 max_constraints = 0; 2374 total_counts_cc = 0; 2375 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2376 total_counts += pcbddc->adaptive_constraints_n[i]; 2377 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2378 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2379 } 2380 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2381 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2382 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2383 constraints_data = pcbddc->adaptive_constraints_data; 2384 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2385 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2386 total_counts_cc = 0; 2387 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2388 if (pcbddc->adaptive_constraints_n[i]) { 2389 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2390 } 2391 } 2392 #if 0 2393 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2394 for (i=0;i<total_counts_cc;i++) { 2395 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2396 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2397 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2398 printf(" %d",constraints_idxs[j]); 2399 } 2400 printf("\n"); 2401 printf("number of cc: %d\n",constraints_n[i]); 2402 } 2403 for (i=0;i<n_vertices;i++) { 2404 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2405 } 2406 for (i=0;i<sub_schurs->n_subs;i++) { 2407 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2408 } 2409 #endif 2410 2411 max_size_of_constraint = 0; 2412 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2413 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2414 /* Change of basis */ 2415 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2416 if (pcbddc->use_change_of_basis) { 2417 for (i=0;i<sub_schurs->n_subs;i++) { 2418 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2419 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2420 } 2421 } 2422 } 2423 } 2424 pcbddc->local_primal_size = total_counts; 2425 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2426 2427 /* map constraints_idxs in boundary numbering */ 2428 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2429 if (i != constraints_idxs_ptr[total_counts_cc]) { 2430 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2431 } 2432 2433 /* Create constraint matrix */ 2434 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2435 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2436 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2437 2438 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2439 /* determine if a QR strategy is needed for change of basis */ 2440 qr_needed = PETSC_FALSE; 2441 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2442 total_primal_vertices=0; 2443 pcbddc->local_primal_size_cc = 0; 2444 for (i=0;i<total_counts_cc;i++) { 2445 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2446 if (size_of_constraint == 1) { 2447 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2448 pcbddc->local_primal_size_cc += 1; 2449 } else if (PetscBTLookup(change_basis,i)) { 2450 for (k=0;k<constraints_n[i];k++) { 2451 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2452 } 2453 pcbddc->local_primal_size_cc += constraints_n[i]; 2454 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2455 PetscBTSet(qr_needed_idx,i); 2456 qr_needed = PETSC_TRUE; 2457 } 2458 } else { 2459 pcbddc->local_primal_size_cc += 1; 2460 } 2461 } 2462 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2463 pcbddc->n_vertices = total_primal_vertices; 2464 /* permute indices in order to have a sorted set of vertices */ 2465 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2466 2467 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2468 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2469 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2470 2471 /* nonzero structure of constraint matrix */ 2472 /* and get reference dof for local constraints */ 2473 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2474 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2475 2476 j = total_primal_vertices; 2477 total_counts = total_primal_vertices; 2478 cum = total_primal_vertices; 2479 for (i=n_vertices;i<total_counts_cc;i++) { 2480 if (!PetscBTLookup(change_basis,i)) { 2481 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2482 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2483 cum++; 2484 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2485 for (k=0;k<constraints_n[i];k++) { 2486 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2487 nnz[j+k] = size_of_constraint; 2488 } 2489 j += constraints_n[i]; 2490 } 2491 } 2492 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2493 ierr = PetscFree(nnz);CHKERRQ(ierr); 2494 2495 /* set values in constraint matrix */ 2496 for (i=0;i<total_primal_vertices;i++) { 2497 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2498 } 2499 total_counts = total_primal_vertices; 2500 for (i=n_vertices;i<total_counts_cc;i++) { 2501 if (!PetscBTLookup(change_basis,i)) { 2502 PetscInt *cols; 2503 2504 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2505 cols = constraints_idxs+constraints_idxs_ptr[i]; 2506 for (k=0;k<constraints_n[i];k++) { 2507 PetscInt row = total_counts+k; 2508 PetscScalar *vals; 2509 2510 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2511 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2512 } 2513 total_counts += constraints_n[i]; 2514 } 2515 } 2516 /* assembling */ 2517 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2518 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2519 2520 /* 2521 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2522 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2523 */ 2524 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2525 if (pcbddc->use_change_of_basis) { 2526 /* dual and primal dofs on a single cc */ 2527 PetscInt dual_dofs,primal_dofs; 2528 /* working stuff for GEQRF */ 2529 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2530 PetscBLASInt lqr_work; 2531 /* working stuff for UNGQR */ 2532 PetscScalar *gqr_work,lgqr_work_t; 2533 PetscBLASInt lgqr_work; 2534 /* working stuff for TRTRS */ 2535 PetscScalar *trs_rhs; 2536 PetscBLASInt Blas_NRHS; 2537 /* pointers for values insertion into change of basis matrix */ 2538 PetscInt *start_rows,*start_cols; 2539 PetscScalar *start_vals; 2540 /* working stuff for values insertion */ 2541 PetscBT is_primal; 2542 PetscInt *aux_primal_numbering_B; 2543 /* matrix sizes */ 2544 PetscInt global_size,local_size; 2545 /* temporary change of basis */ 2546 Mat localChangeOfBasisMatrix; 2547 /* extra space for debugging */ 2548 PetscScalar *dbg_work; 2549 2550 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2551 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2552 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2553 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2554 /* nonzeros for local mat */ 2555 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2556 for (i=0;i<pcis->n;i++) nnz[i]=1; 2557 for (i=n_vertices;i<total_counts_cc;i++) { 2558 if (PetscBTLookup(change_basis,i)) { 2559 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2560 if (PetscBTLookup(qr_needed_idx,i)) { 2561 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2562 } else { 2563 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2564 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2565 } 2566 } 2567 } 2568 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2569 ierr = PetscFree(nnz);CHKERRQ(ierr); 2570 /* Set initial identity in the matrix */ 2571 for (i=0;i<pcis->n;i++) { 2572 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2573 } 2574 2575 if (pcbddc->dbg_flag) { 2576 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2577 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2578 } 2579 2580 2581 /* Now we loop on the constraints which need a change of basis */ 2582 /* 2583 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2584 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2585 2586 Basic blocks of change of basis matrix T computed by 2587 2588 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2589 2590 | 1 0 ... 0 s_1/S | 2591 | 0 1 ... 0 s_2/S | 2592 | ... | 2593 | 0 ... 1 s_{n-1}/S | 2594 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2595 2596 with S = \sum_{i=1}^n s_i^2 2597 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2598 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2599 2600 - QR decomposition of constraints otherwise 2601 */ 2602 if (qr_needed) { 2603 /* space to store Q */ 2604 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2605 /* first we issue queries for optimal work */ 2606 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2607 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2608 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2609 lqr_work = -1; 2610 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2611 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2612 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2613 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2614 lgqr_work = -1; 2615 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2616 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2617 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2618 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2619 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2620 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2621 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2622 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2623 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2624 /* array to store scaling factors for reflectors */ 2625 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2626 /* array to store rhs and solution of triangular solver */ 2627 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2628 /* allocating workspace for check */ 2629 if (pcbddc->dbg_flag) { 2630 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2631 } 2632 } 2633 /* array to store whether a node is primal or not */ 2634 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2635 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2636 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2637 if (i != total_primal_vertices) { 2638 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2639 } 2640 for (i=0;i<total_primal_vertices;i++) { 2641 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2642 } 2643 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2644 2645 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2646 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2647 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2648 if (PetscBTLookup(change_basis,total_counts)) { 2649 /* get constraint info */ 2650 primal_dofs = constraints_n[total_counts]; 2651 dual_dofs = size_of_constraint-primal_dofs; 2652 2653 if (pcbddc->dbg_flag) { 2654 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2655 } 2656 2657 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2658 2659 /* copy quadrature constraints for change of basis check */ 2660 if (pcbddc->dbg_flag) { 2661 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2662 } 2663 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2664 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2665 2666 /* compute QR decomposition of constraints */ 2667 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2668 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2669 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2670 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2671 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2672 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2673 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2674 2675 /* explictly compute R^-T */ 2676 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2677 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2678 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2679 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2680 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2681 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2682 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2683 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2684 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2685 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2686 2687 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2688 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2689 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2690 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2691 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2692 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2693 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2694 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2695 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2696 2697 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2698 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2699 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2700 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2701 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2702 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2703 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2704 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2705 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2706 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2707 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2708 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2709 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2710 2711 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2712 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2713 /* insert cols for primal dofs */ 2714 for (j=0;j<primal_dofs;j++) { 2715 start_vals = &qr_basis[j*size_of_constraint]; 2716 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2717 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2718 } 2719 /* insert cols for dual dofs */ 2720 for (j=0,k=0;j<dual_dofs;k++) { 2721 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2722 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2723 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2724 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2725 j++; 2726 } 2727 } 2728 2729 /* check change of basis */ 2730 if (pcbddc->dbg_flag) { 2731 PetscInt ii,jj; 2732 PetscBool valid_qr=PETSC_TRUE; 2733 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2734 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2735 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2736 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2737 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2738 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2739 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2740 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2741 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2742 for (jj=0;jj<size_of_constraint;jj++) { 2743 for (ii=0;ii<primal_dofs;ii++) { 2744 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2745 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2746 } 2747 } 2748 if (!valid_qr) { 2749 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2750 for (jj=0;jj<size_of_constraint;jj++) { 2751 for (ii=0;ii<primal_dofs;ii++) { 2752 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2753 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2754 } 2755 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2756 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2757 } 2758 } 2759 } 2760 } else { 2761 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2762 } 2763 } 2764 } else { /* simple transformation block */ 2765 PetscInt row,col; 2766 PetscScalar val,norm; 2767 2768 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2769 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2770 for (j=0;j<size_of_constraint;j++) { 2771 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2772 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2773 if (!PetscBTLookup(is_primal,row_B)) { 2774 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2775 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2776 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2777 } else { 2778 for (k=0;k<size_of_constraint;k++) { 2779 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2780 if (row != col) { 2781 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2782 } else { 2783 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2784 } 2785 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2786 } 2787 } 2788 } 2789 if (pcbddc->dbg_flag) { 2790 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2791 } 2792 } 2793 } else { 2794 if (pcbddc->dbg_flag) { 2795 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2796 } 2797 } 2798 } 2799 2800 /* free workspace */ 2801 if (qr_needed) { 2802 if (pcbddc->dbg_flag) { 2803 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2804 } 2805 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2806 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2807 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2808 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2809 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2810 } 2811 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2812 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2813 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2814 2815 /* assembling of global change of variable */ 2816 { 2817 Mat tmat; 2818 PetscInt bs; 2819 2820 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2821 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2822 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2823 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2824 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2825 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2826 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2827 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2828 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2829 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2830 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2831 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2832 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2833 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2834 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2835 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2836 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2837 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2838 } 2839 /* check */ 2840 if (pcbddc->dbg_flag) { 2841 PetscReal error; 2842 Vec x,x_change; 2843 2844 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2845 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2846 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2847 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2848 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2849 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2850 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2851 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2852 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2853 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2854 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2855 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2856 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2857 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2858 ierr = VecDestroy(&x);CHKERRQ(ierr); 2859 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2860 } 2861 2862 /* adapt sub_schurs computed (if any) */ 2863 if (pcbddc->use_deluxe_scaling) { 2864 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2865 if (sub_schurs->S_Ej_all) { 2866 Mat S_new,tmat; 2867 IS is_all_N; 2868 2869 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2870 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2871 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2872 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2873 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2874 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2875 sub_schurs->S_Ej_all = S_new; 2876 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2877 if (sub_schurs->sum_S_Ej_all) { 2878 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2879 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2880 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2881 sub_schurs->sum_S_Ej_all = S_new; 2882 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2883 } 2884 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2885 } 2886 } 2887 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2888 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2889 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2890 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2891 } 2892 2893 /* set up change of basis context */ 2894 if (pcbddc->ChangeOfBasisMatrix) { 2895 PCBDDCChange_ctx change_ctx; 2896 2897 if (!pcbddc->new_global_mat) { 2898 PetscInt global_size,local_size; 2899 2900 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2901 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2902 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2903 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2904 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2905 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2906 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2907 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2908 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2909 } else { 2910 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2911 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2912 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2913 } 2914 if (!pcbddc->user_ChangeOfBasisMatrix) { 2915 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2916 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2917 } else { 2918 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2919 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2920 } 2921 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2922 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2923 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2924 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2925 } 2926 2927 /* check if a new primal space has been introduced */ 2928 pcbddc->new_primal_space_local = PETSC_TRUE; 2929 if (olocal_primal_size == pcbddc->local_primal_size) { 2930 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2931 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2932 if (!pcbddc->new_primal_space_local) { 2933 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2934 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2935 } 2936 } 2937 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2938 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2939 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2940 2941 /* flush dbg viewer */ 2942 if (pcbddc->dbg_flag) { 2943 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2944 } 2945 2946 /* free workspace */ 2947 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2948 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2949 if (!pcbddc->adaptive_selection) { 2950 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2951 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2952 } else { 2953 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2954 pcbddc->adaptive_constraints_idxs_ptr, 2955 pcbddc->adaptive_constraints_data_ptr, 2956 pcbddc->adaptive_constraints_idxs, 2957 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2958 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2959 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2960 } 2961 PetscFunctionReturn(0); 2962 } 2963 2964 #undef __FUNCT__ 2965 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2966 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2967 { 2968 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2969 PC_IS *pcis = (PC_IS*)pc->data; 2970 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2971 PetscInt ierr,i,vertex_size,N; 2972 PetscViewer viewer=pcbddc->dbg_viewer; 2973 2974 PetscFunctionBegin; 2975 /* Reset previously computed graph */ 2976 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2977 /* Init local Graph struct */ 2978 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 2979 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 2980 2981 /* Check validity of the csr graph passed in by the user */ 2982 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2983 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2984 } 2985 2986 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2987 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2988 PetscInt *xadj,*adjncy; 2989 PetscInt nvtxs; 2990 PetscBool flg_row=PETSC_FALSE; 2991 2992 if (pcbddc->use_local_adj) { 2993 2994 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2995 if (flg_row) { 2996 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2997 pcbddc->computed_rowadj = PETSC_TRUE; 2998 } 2999 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3000 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3001 IS is_dummy; 3002 ISLocalToGlobalMapping l2gmap_dummy; 3003 PetscInt j,sum; 3004 PetscInt *cxadj,*cadjncy; 3005 const PetscInt *idxs; 3006 PCBDDCGraph graph; 3007 PetscBT is_on_boundary; 3008 3009 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3010 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3011 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3012 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3013 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3014 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3015 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3016 if (flg_row) { 3017 graph->xadj = xadj; 3018 graph->adjncy = adjncy; 3019 } 3020 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3021 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3022 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3023 3024 if (pcbddc->dbg_flag) { 3025 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3026 for (i=0;i<graph->ncc;i++) { 3027 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3028 } 3029 } 3030 3031 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3032 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3033 for (i=0;i<pcis->n_B;i++) { 3034 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3035 } 3036 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3037 3038 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3039 sum = 0; 3040 for (i=0;i<graph->ncc;i++) { 3041 PetscInt sizecc = 0; 3042 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3043 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3044 sizecc++; 3045 } 3046 } 3047 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3048 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3049 cxadj[graph->queue[j]] = sizecc; 3050 } 3051 } 3052 sum += sizecc*sizecc; 3053 } 3054 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3055 sum = 0; 3056 for (i=0;i<pcis->n;i++) { 3057 PetscInt temp = cxadj[i]; 3058 cxadj[i] = sum; 3059 sum += temp; 3060 } 3061 cxadj[pcis->n] = sum; 3062 for (i=0;i<graph->ncc;i++) { 3063 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3064 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3065 PetscInt k,sizecc = 0; 3066 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3067 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3068 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3069 sizecc++; 3070 } 3071 } 3072 } 3073 } 3074 } 3075 if (sum) { 3076 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3077 } else { 3078 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3079 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3080 } 3081 graph->xadj = 0; 3082 graph->adjncy = 0; 3083 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3084 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3085 } 3086 } 3087 if (pcbddc->dbg_flag) { 3088 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3089 } 3090 3091 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3092 vertex_size = 1; 3093 if (pcbddc->user_provided_isfordofs) { 3094 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3095 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3096 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3097 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3098 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3099 } 3100 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3101 pcbddc->n_ISForDofs = 0; 3102 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3103 } 3104 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3105 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3106 } else { 3107 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3108 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3109 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3110 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3111 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3112 } 3113 } 3114 } 3115 3116 /* Setup of Graph */ 3117 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3118 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3119 } 3120 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3121 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3122 } 3123 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3124 3125 /* Graph's connected components analysis */ 3126 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3127 3128 /* print some info to stdout */ 3129 if (pcbddc->dbg_flag) { 3130 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3131 } 3132 3133 /* mark topography has done */ 3134 pcbddc->recompute_topography = PETSC_FALSE; 3135 PetscFunctionReturn(0); 3136 } 3137 3138 #undef __FUNCT__ 3139 #define __FUNCT__ "PCBDDCSubsetNumbering" 3140 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3141 { 3142 Vec local_vec,global_vec; 3143 IS seqis,paris; 3144 VecScatter scatter_ctx; 3145 PetscScalar *array; 3146 PetscInt *temp_global_dofs; 3147 PetscScalar globalsum; 3148 PetscInt i,j,s; 3149 PetscInt nlocals,first_index,old_index,max_local,max_global; 3150 PetscMPIInt rank_prec_comm,size_prec_comm; 3151 PetscInt *dof_sizes,*dof_displs; 3152 PetscBool first_found; 3153 PetscErrorCode ierr; 3154 3155 PetscFunctionBegin; 3156 /* mpi buffers */ 3157 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3158 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3159 j = ( !rank_prec_comm ? size_prec_comm : 0); 3160 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3161 /* get maximum size of subset */ 3162 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3163 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3164 max_local = 0; 3165 for (i=0;i<n_local_dofs;i++) { 3166 if (max_local < temp_global_dofs[i] ) { 3167 max_local = temp_global_dofs[i]; 3168 } 3169 } 3170 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3171 max_global++; 3172 max_local = 0; 3173 for (i=0;i<n_local_dofs;i++) { 3174 if (max_local < local_dofs[i] ) { 3175 max_local = local_dofs[i]; 3176 } 3177 } 3178 max_local++; 3179 /* allocate workspace */ 3180 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3181 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3182 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3183 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3184 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3185 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3186 /* create scatter */ 3187 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3188 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3189 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3190 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3191 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3192 /* init array */ 3193 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3194 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3195 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3196 if (local_dofs_mult) { 3197 for (i=0;i<n_local_dofs;i++) { 3198 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3199 } 3200 } else { 3201 for (i=0;i<n_local_dofs;i++) { 3202 array[local_dofs[i]]=1.0; 3203 } 3204 } 3205 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3206 /* scatter into global vec and get total number of global dofs */ 3207 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3208 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3209 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3210 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3211 /* Fill global_vec with cumulative function for global numbering */ 3212 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3213 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3214 nlocals = 0; 3215 first_index = -1; 3216 first_found = PETSC_FALSE; 3217 for (i=0;i<s;i++) { 3218 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3219 first_found = PETSC_TRUE; 3220 first_index = i; 3221 } 3222 nlocals += (PetscInt)PetscRealPart(array[i]); 3223 } 3224 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3225 if (!rank_prec_comm) { 3226 dof_displs[0]=0; 3227 for (i=1;i<size_prec_comm;i++) { 3228 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3229 } 3230 } 3231 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3232 if (first_found) { 3233 array[first_index] += (PetscScalar)nlocals; 3234 old_index = first_index; 3235 for (i=first_index+1;i<s;i++) { 3236 if (PetscRealPart(array[i]) > 0.1) { 3237 array[i] += array[old_index]; 3238 old_index = i; 3239 } 3240 } 3241 } 3242 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3243 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3244 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3245 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3246 /* get global ordering of local dofs */ 3247 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3248 if (local_dofs_mult) { 3249 for (i=0;i<n_local_dofs;i++) { 3250 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3251 } 3252 } else { 3253 for (i=0;i<n_local_dofs;i++) { 3254 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3255 } 3256 } 3257 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3258 /* free workspace */ 3259 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3260 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3261 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3262 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3263 /* return pointer to global ordering of local dofs */ 3264 *global_numbering_subset = temp_global_dofs; 3265 PetscFunctionReturn(0); 3266 } 3267 3268 #undef __FUNCT__ 3269 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3270 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3271 { 3272 PetscInt i,j; 3273 PetscScalar *alphas; 3274 PetscErrorCode ierr; 3275 3276 PetscFunctionBegin; 3277 /* this implements stabilized Gram-Schmidt */ 3278 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3279 for (i=0;i<n;i++) { 3280 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3281 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3282 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3283 } 3284 ierr = PetscFree(alphas);CHKERRQ(ierr); 3285 PetscFunctionReturn(0); 3286 } 3287 3288 #undef __FUNCT__ 3289 #define __FUNCT__ "MatISGetSubassemblingPattern" 3290 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3291 { 3292 Mat subdomain_adj; 3293 IS new_ranks,ranks_send_to; 3294 MatPartitioning partitioner; 3295 Mat_IS *matis; 3296 PetscInt n_neighs,*neighs,*n_shared,**shared; 3297 PetscInt prank; 3298 PetscMPIInt size,rank,color; 3299 PetscInt *xadj,*adjncy,*oldranks; 3300 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3301 PetscInt i,local_size,threshold=0; 3302 PetscErrorCode ierr; 3303 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3304 PetscSubcomm subcomm; 3305 3306 PetscFunctionBegin; 3307 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3308 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3309 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3310 3311 /* Get info on mapping */ 3312 matis = (Mat_IS*)(mat->data); 3313 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3314 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3315 3316 /* build local CSR graph of subdomains' connectivity */ 3317 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3318 xadj[0] = 0; 3319 xadj[1] = PetscMax(n_neighs-1,0); 3320 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3321 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3322 3323 if (threshold) { 3324 PetscInt xadj_count = 0; 3325 for (i=1;i<n_neighs;i++) { 3326 if (n_shared[i] > threshold) { 3327 adjncy[xadj_count] = neighs[i]; 3328 adjncy_wgt[xadj_count] = n_shared[i]; 3329 xadj_count++; 3330 } 3331 } 3332 xadj[1] = xadj_count; 3333 } else { 3334 if (xadj[1]) { 3335 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3336 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3337 } 3338 } 3339 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3340 if (use_square) { 3341 for (i=0;i<xadj[1];i++) { 3342 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3343 } 3344 } 3345 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3346 3347 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3348 3349 /* 3350 Restrict work on active processes only. 3351 */ 3352 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3353 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3354 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3355 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3356 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3357 if (color) { 3358 ierr = PetscFree(xadj);CHKERRQ(ierr); 3359 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3360 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3361 } else { 3362 PetscInt coarsening_ratio; 3363 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3364 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3365 prank = rank; 3366 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3367 /* 3368 for (i=0;i<size;i++) { 3369 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3370 } 3371 */ 3372 for (i=0;i<xadj[1];i++) { 3373 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3374 } 3375 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3376 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3377 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3378 3379 /* Partition */ 3380 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3381 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3382 if (use_vwgt) { 3383 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3384 v_wgt[0] = local_size; 3385 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3386 } 3387 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3388 coarsening_ratio = size/n_subdomains; 3389 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3390 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3391 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3392 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3393 3394 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3395 if (contiguous) { 3396 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3397 } else { 3398 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3399 } 3400 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3401 /* clean up */ 3402 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3403 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3404 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3405 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3406 } 3407 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3408 3409 /* assemble parallel IS for sends */ 3410 i = 1; 3411 if (color) i=0; 3412 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3413 3414 /* get back IS */ 3415 *is_sends = ranks_send_to; 3416 PetscFunctionReturn(0); 3417 } 3418 3419 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3420 3421 #undef __FUNCT__ 3422 #define __FUNCT__ "MatISSubassemble" 3423 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3424 { 3425 Mat local_mat; 3426 Mat_IS *matis; 3427 IS is_sends_internal; 3428 PetscInt rows,cols,new_local_rows; 3429 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3430 PetscBool ismatis,isdense,newisdense,destroy_mat; 3431 ISLocalToGlobalMapping l2gmap; 3432 PetscInt* l2gmap_indices; 3433 const PetscInt* is_indices; 3434 MatType new_local_type; 3435 /* buffers */ 3436 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3437 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3438 PetscInt *recv_buffer_idxs_local; 3439 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3440 /* MPI */ 3441 MPI_Comm comm,comm_n; 3442 PetscSubcomm subcomm; 3443 PetscMPIInt n_sends,n_recvs,commsize; 3444 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3445 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3446 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3447 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3448 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3449 PetscErrorCode ierr; 3450 3451 PetscFunctionBegin; 3452 /* TODO: add missing checks */ 3453 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3454 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3455 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3456 PetscValidLogicalCollectiveInt(mat,nis,7); 3457 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3458 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3459 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3460 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3461 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3462 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3463 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3464 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3465 PetscInt mrows,mcols,mnrows,mncols; 3466 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3467 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3468 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3469 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3470 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3471 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3472 } 3473 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3474 PetscValidLogicalCollectiveInt(mat,bs,0); 3475 /* prepare IS for sending if not provided */ 3476 if (!is_sends) { 3477 PetscBool pcontig = PETSC_TRUE; 3478 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3479 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3480 } else { 3481 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3482 is_sends_internal = is_sends; 3483 } 3484 3485 /* get pointer of MATIS data */ 3486 matis = (Mat_IS*)mat->data; 3487 3488 /* get comm */ 3489 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3490 3491 /* compute number of sends */ 3492 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3493 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3494 3495 /* compute number of receives */ 3496 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3497 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3498 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3499 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3500 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3501 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3502 ierr = PetscFree(iflags);CHKERRQ(ierr); 3503 3504 /* restrict comm if requested */ 3505 subcomm = 0; 3506 destroy_mat = PETSC_FALSE; 3507 if (restrict_comm) { 3508 PetscMPIInt color,subcommsize; 3509 3510 color = 0; 3511 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3512 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3513 subcommsize = commsize - subcommsize; 3514 /* check if reuse has been requested */ 3515 if (reuse == MAT_REUSE_MATRIX) { 3516 if (*mat_n) { 3517 PetscMPIInt subcommsize2; 3518 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3519 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3520 comm_n = PetscObjectComm((PetscObject)*mat_n); 3521 } else { 3522 comm_n = PETSC_COMM_SELF; 3523 } 3524 } else { /* MAT_INITIAL_MATRIX */ 3525 PetscMPIInt rank; 3526 3527 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3528 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3529 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3530 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3531 comm_n = PetscSubcommChild(subcomm); 3532 } 3533 /* flag to destroy *mat_n if not significative */ 3534 if (color) destroy_mat = PETSC_TRUE; 3535 } else { 3536 comm_n = comm; 3537 } 3538 3539 /* prepare send/receive buffers */ 3540 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3541 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3542 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3543 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3544 if (nis) { 3545 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3546 } 3547 3548 /* Get data from local matrices */ 3549 if (!isdense) { 3550 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3551 /* TODO: See below some guidelines on how to prepare the local buffers */ 3552 /* 3553 send_buffer_vals should contain the raw values of the local matrix 3554 send_buffer_idxs should contain: 3555 - MatType_PRIVATE type 3556 - PetscInt size_of_l2gmap 3557 - PetscInt global_row_indices[size_of_l2gmap] 3558 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3559 */ 3560 } else { 3561 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3562 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3563 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3564 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3565 send_buffer_idxs[1] = i; 3566 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3567 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3568 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3569 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3570 for (i=0;i<n_sends;i++) { 3571 ilengths_vals[is_indices[i]] = len*len; 3572 ilengths_idxs[is_indices[i]] = len+2; 3573 } 3574 } 3575 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3576 /* additional is (if any) */ 3577 if (nis) { 3578 PetscMPIInt psum; 3579 PetscInt j; 3580 for (j=0,psum=0;j<nis;j++) { 3581 PetscInt plen; 3582 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3583 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3584 psum += len+1; /* indices + lenght */ 3585 } 3586 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3587 for (j=0,psum=0;j<nis;j++) { 3588 PetscInt plen; 3589 const PetscInt *is_array_idxs; 3590 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3591 send_buffer_idxs_is[psum] = plen; 3592 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3593 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3594 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3595 psum += plen+1; /* indices + lenght */ 3596 } 3597 for (i=0;i<n_sends;i++) { 3598 ilengths_idxs_is[is_indices[i]] = psum; 3599 } 3600 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3601 } 3602 3603 buf_size_idxs = 0; 3604 buf_size_vals = 0; 3605 buf_size_idxs_is = 0; 3606 for (i=0;i<n_recvs;i++) { 3607 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3608 buf_size_vals += (PetscInt)olengths_vals[i]; 3609 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3610 } 3611 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3612 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3613 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3614 3615 /* get new tags for clean communications */ 3616 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3617 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3618 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3619 3620 /* allocate for requests */ 3621 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3622 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3623 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3624 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3625 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3626 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3627 3628 /* communications */ 3629 ptr_idxs = recv_buffer_idxs; 3630 ptr_vals = recv_buffer_vals; 3631 ptr_idxs_is = recv_buffer_idxs_is; 3632 for (i=0;i<n_recvs;i++) { 3633 source_dest = onodes[i]; 3634 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3635 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3636 ptr_idxs += olengths_idxs[i]; 3637 ptr_vals += olengths_vals[i]; 3638 if (nis) { 3639 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3640 ptr_idxs_is += olengths_idxs_is[i]; 3641 } 3642 } 3643 for (i=0;i<n_sends;i++) { 3644 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3645 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3646 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3647 if (nis) { 3648 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3649 } 3650 } 3651 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3652 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3653 3654 /* assemble new l2g map */ 3655 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3656 ptr_idxs = recv_buffer_idxs; 3657 new_local_rows = 0; 3658 for (i=0;i<n_recvs;i++) { 3659 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3660 ptr_idxs += olengths_idxs[i]; 3661 } 3662 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3663 ptr_idxs = recv_buffer_idxs; 3664 new_local_rows = 0; 3665 for (i=0;i<n_recvs;i++) { 3666 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3667 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3668 ptr_idxs += olengths_idxs[i]; 3669 } 3670 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3671 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3672 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3673 3674 /* infer new local matrix type from received local matrices type */ 3675 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3676 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3677 if (n_recvs) { 3678 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3679 ptr_idxs = recv_buffer_idxs; 3680 for (i=0;i<n_recvs;i++) { 3681 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3682 new_local_type_private = MATAIJ_PRIVATE; 3683 break; 3684 } 3685 ptr_idxs += olengths_idxs[i]; 3686 } 3687 switch (new_local_type_private) { 3688 case MATDENSE_PRIVATE: 3689 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3690 new_local_type = MATSEQAIJ; 3691 bs = 1; 3692 } else { /* if I receive only 1 dense matrix */ 3693 new_local_type = MATSEQDENSE; 3694 bs = 1; 3695 } 3696 break; 3697 case MATAIJ_PRIVATE: 3698 new_local_type = MATSEQAIJ; 3699 bs = 1; 3700 break; 3701 case MATBAIJ_PRIVATE: 3702 new_local_type = MATSEQBAIJ; 3703 break; 3704 case MATSBAIJ_PRIVATE: 3705 new_local_type = MATSEQSBAIJ; 3706 break; 3707 default: 3708 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3709 break; 3710 } 3711 } else { /* by default, new_local_type is seqdense */ 3712 new_local_type = MATSEQDENSE; 3713 bs = 1; 3714 } 3715 3716 /* create MATIS object if needed */ 3717 if (reuse == MAT_INITIAL_MATRIX) { 3718 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3719 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3720 } else { 3721 /* it also destroys the local matrices */ 3722 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3723 } 3724 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3725 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3726 3727 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3728 3729 /* Global to local map of received indices */ 3730 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3731 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3732 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3733 3734 /* restore attributes -> type of incoming data and its size */ 3735 buf_size_idxs = 0; 3736 for (i=0;i<n_recvs;i++) { 3737 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3738 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3739 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3740 } 3741 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3742 3743 /* set preallocation */ 3744 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3745 if (!newisdense) { 3746 PetscInt *new_local_nnz=0; 3747 3748 ptr_vals = recv_buffer_vals; 3749 ptr_idxs = recv_buffer_idxs_local; 3750 if (n_recvs) { 3751 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3752 } 3753 for (i=0;i<n_recvs;i++) { 3754 PetscInt j; 3755 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3756 for (j=0;j<*(ptr_idxs+1);j++) { 3757 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3758 } 3759 } else { 3760 /* TODO */ 3761 } 3762 ptr_idxs += olengths_idxs[i]; 3763 } 3764 if (new_local_nnz) { 3765 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3766 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3767 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3768 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3769 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3770 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3771 } else { 3772 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3773 } 3774 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3775 } else { 3776 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3777 } 3778 3779 /* set values */ 3780 ptr_vals = recv_buffer_vals; 3781 ptr_idxs = recv_buffer_idxs_local; 3782 for (i=0;i<n_recvs;i++) { 3783 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3784 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3785 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3786 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3787 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3788 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3789 } else { 3790 /* TODO */ 3791 } 3792 ptr_idxs += olengths_idxs[i]; 3793 ptr_vals += olengths_vals[i]; 3794 } 3795 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3796 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3797 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3798 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3799 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3800 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3801 3802 #if 0 3803 if (!restrict_comm) { /* check */ 3804 Vec lvec,rvec; 3805 PetscReal infty_error; 3806 3807 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3808 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3809 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3810 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3811 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3812 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3813 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3814 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3815 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3816 } 3817 #endif 3818 3819 /* assemble new additional is (if any) */ 3820 if (nis) { 3821 PetscInt **temp_idxs,*count_is,j,psum; 3822 3823 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3824 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3825 ptr_idxs = recv_buffer_idxs_is; 3826 psum = 0; 3827 for (i=0;i<n_recvs;i++) { 3828 for (j=0;j<nis;j++) { 3829 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3830 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3831 psum += plen; 3832 ptr_idxs += plen+1; /* shift pointer to received data */ 3833 } 3834 } 3835 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3836 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3837 for (i=1;i<nis;i++) { 3838 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3839 } 3840 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3841 ptr_idxs = recv_buffer_idxs_is; 3842 for (i=0;i<n_recvs;i++) { 3843 for (j=0;j<nis;j++) { 3844 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3845 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3846 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3847 ptr_idxs += plen+1; /* shift pointer to received data */ 3848 } 3849 } 3850 for (i=0;i<nis;i++) { 3851 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3852 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3853 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3854 } 3855 ierr = PetscFree(count_is);CHKERRQ(ierr); 3856 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3857 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3858 } 3859 /* free workspace */ 3860 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3861 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3862 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3863 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3864 if (isdense) { 3865 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3866 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3867 } else { 3868 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3869 } 3870 if (nis) { 3871 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3872 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3873 } 3874 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3875 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3876 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3877 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3878 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3879 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3880 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3881 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3882 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3883 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3884 ierr = PetscFree(onodes);CHKERRQ(ierr); 3885 if (nis) { 3886 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3887 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3888 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3889 } 3890 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3891 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3892 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3893 for (i=0;i<nis;i++) { 3894 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3895 } 3896 } 3897 PetscFunctionReturn(0); 3898 } 3899 3900 /* temporary hack into ksp private data structure */ 3901 #include <petsc/private/kspimpl.h> 3902 3903 #undef __FUNCT__ 3904 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3905 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3906 { 3907 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3908 PC_IS *pcis = (PC_IS*)pc->data; 3909 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3910 MatNullSpace CoarseNullSpace=NULL; 3911 ISLocalToGlobalMapping coarse_islg; 3912 IS coarse_is,*isarray; 3913 PetscInt i,im_active=-1,active_procs=-1; 3914 PetscInt nis,nisdofs,nisneu; 3915 PC pc_temp; 3916 PCType coarse_pc_type; 3917 KSPType coarse_ksp_type; 3918 PetscBool multilevel_requested,multilevel_allowed; 3919 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3920 Mat t_coarse_mat_is; 3921 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3922 PetscMPIInt all_procs; 3923 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3924 PetscBool compute_vecs = PETSC_FALSE; 3925 PetscScalar *array; 3926 PetscErrorCode ierr; 3927 3928 PetscFunctionBegin; 3929 /* Assign global numbering to coarse dofs */ 3930 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3931 PetscInt ocoarse_size; 3932 compute_vecs = PETSC_TRUE; 3933 ocoarse_size = pcbddc->coarse_size; 3934 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3935 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3936 /* see if we can avoid some work */ 3937 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3938 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3939 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3940 coarse_reuse = PETSC_FALSE; 3941 } else { /* we can safely reuse already computed coarse matrix */ 3942 coarse_reuse = PETSC_TRUE; 3943 } 3944 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3945 coarse_reuse = PETSC_FALSE; 3946 } 3947 /* reset any subassembling information */ 3948 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3949 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3950 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3951 coarse_reuse = PETSC_TRUE; 3952 } 3953 3954 /* count "active" (i.e. with positive local size) and "void" processes */ 3955 im_active = !!(pcis->n); 3956 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3957 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3958 void_procs = all_procs-active_procs; 3959 csin_type_simple = PETSC_TRUE; 3960 redist = PETSC_FALSE; 3961 if (pcbddc->current_level && void_procs) { 3962 csin_ml = PETSC_TRUE; 3963 ncoarse_ml = void_procs; 3964 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3965 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3966 csin_ds = PETSC_TRUE; 3967 ncoarse_ds = pcbddc->redistribute_coarse; 3968 redist = PETSC_TRUE; 3969 } else { 3970 csin_ds = PETSC_TRUE; 3971 ncoarse_ds = active_procs; 3972 redist = PETSC_TRUE; 3973 } 3974 } else { 3975 csin_ml = PETSC_FALSE; 3976 ncoarse_ml = all_procs; 3977 if (void_procs) { 3978 csin_ds = PETSC_TRUE; 3979 ncoarse_ds = void_procs; 3980 csin_type_simple = PETSC_FALSE; 3981 } else { 3982 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3983 csin_ds = PETSC_TRUE; 3984 ncoarse_ds = pcbddc->redistribute_coarse; 3985 redist = PETSC_TRUE; 3986 } else { 3987 csin_ds = PETSC_FALSE; 3988 ncoarse_ds = all_procs; 3989 } 3990 } 3991 } 3992 3993 /* 3994 test if we can go multilevel: three conditions must be satisfied: 3995 - we have not exceeded the number of levels requested 3996 - we can actually subassemble the active processes 3997 - we can find a suitable number of MPI processes where we can place the subassembled problem 3998 */ 3999 multilevel_allowed = PETSC_FALSE; 4000 multilevel_requested = PETSC_FALSE; 4001 if (pcbddc->current_level < pcbddc->max_levels) { 4002 multilevel_requested = PETSC_TRUE; 4003 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4004 multilevel_allowed = PETSC_FALSE; 4005 } else { 4006 multilevel_allowed = PETSC_TRUE; 4007 } 4008 } 4009 /* determine number of process partecipating to coarse solver */ 4010 if (multilevel_allowed) { 4011 ncoarse = ncoarse_ml; 4012 csin = csin_ml; 4013 redist = PETSC_FALSE; 4014 } else { 4015 ncoarse = ncoarse_ds; 4016 csin = csin_ds; 4017 } 4018 4019 /* creates temporary l2gmap and IS for coarse indexes */ 4020 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4021 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4022 4023 /* creates temporary MATIS object for coarse matrix */ 4024 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4025 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4026 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4027 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4028 #if 0 4029 { 4030 PetscViewer viewer; 4031 char filename[256]; 4032 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4033 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4034 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4035 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4036 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4037 } 4038 #endif 4039 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4040 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4041 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4042 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4043 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4044 4045 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4046 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4047 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4048 const PetscInt *idxs; 4049 ISLocalToGlobalMapping tmap; 4050 4051 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4052 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4053 /* allocate space for temporary storage */ 4054 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4055 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4056 /* allocate for IS array */ 4057 nisdofs = pcbddc->n_ISForDofsLocal; 4058 nisneu = !!pcbddc->NeumannBoundariesLocal; 4059 nis = nisdofs + nisneu; 4060 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4061 /* dofs splitting */ 4062 for (i=0;i<nisdofs;i++) { 4063 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4064 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4065 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4066 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4067 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4068 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4069 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4070 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4071 } 4072 /* neumann boundaries */ 4073 if (pcbddc->NeumannBoundariesLocal) { 4074 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4075 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4076 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4077 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4078 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4079 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4080 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4081 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4082 } 4083 /* free memory */ 4084 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4085 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4086 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4087 } else { 4088 nis = 0; 4089 nisdofs = 0; 4090 nisneu = 0; 4091 isarray = NULL; 4092 } 4093 /* destroy no longer needed map */ 4094 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4095 4096 /* restrict on coarse candidates (if needed) */ 4097 coarse_mat_is = NULL; 4098 if (csin) { 4099 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4100 if (redist) { 4101 PetscMPIInt rank; 4102 PetscInt spc,n_spc_p1,dest[1],destsize; 4103 4104 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4105 spc = active_procs/ncoarse; 4106 n_spc_p1 = active_procs%ncoarse; 4107 if (im_active) { 4108 destsize = 1; 4109 if (rank > n_spc_p1*(spc+1)-1) { 4110 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4111 } else { 4112 dest[0] = rank/(spc+1); 4113 } 4114 } else { 4115 destsize = 0; 4116 } 4117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4118 } else if (csin_type_simple) { 4119 PetscMPIInt rank; 4120 PetscInt issize,isidx; 4121 4122 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4123 if (im_active) { 4124 issize = 1; 4125 isidx = (PetscInt)rank; 4126 } else { 4127 issize = 0; 4128 isidx = -1; 4129 } 4130 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4131 } else { /* get a suitable subassembling pattern from MATIS code */ 4132 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4133 } 4134 4135 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4136 if (!redist || ncoarse <= void_procs) { 4137 PetscInt ncoarse_cand,tissize,*nisindices; 4138 PetscInt *coarse_candidates; 4139 const PetscInt* tisindices; 4140 4141 /* get coarse candidates' ranks in pc communicator */ 4142 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4143 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4144 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4145 if (!coarse_candidates[i]) { 4146 coarse_candidates[ncoarse_cand++]=i; 4147 } 4148 } 4149 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4150 4151 4152 if (pcbddc->dbg_flag) { 4153 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4154 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4155 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4156 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4157 for (i=0;i<ncoarse_cand;i++) { 4158 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4159 } 4160 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4161 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4162 } 4163 /* shift the pattern on coarse candidates */ 4164 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4165 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4166 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4167 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4168 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4169 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4170 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4171 } 4172 if (pcbddc->dbg_flag) { 4173 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4174 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4175 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4176 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4177 } 4178 } 4179 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4180 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4181 } else { 4182 if (pcbddc->dbg_flag) { 4183 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4184 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4185 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4186 } 4187 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4188 coarse_mat_is = t_coarse_mat_is; 4189 } 4190 4191 /* create local to global scatters for coarse problem */ 4192 if (compute_vecs) { 4193 PetscInt lrows; 4194 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4195 if (coarse_mat_is) { 4196 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4197 } else { 4198 lrows = 0; 4199 } 4200 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4201 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4202 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4203 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4204 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4205 } 4206 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4207 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4208 4209 /* set defaults for coarse KSP and PC */ 4210 if (multilevel_allowed) { 4211 coarse_ksp_type = KSPRICHARDSON; 4212 coarse_pc_type = PCBDDC; 4213 } else { 4214 coarse_ksp_type = KSPPREONLY; 4215 coarse_pc_type = PCREDUNDANT; 4216 } 4217 4218 /* print some info if requested */ 4219 if (pcbddc->dbg_flag) { 4220 if (!multilevel_allowed) { 4221 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4222 if (multilevel_requested) { 4223 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4224 } else if (pcbddc->max_levels) { 4225 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4226 } 4227 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4228 } 4229 } 4230 4231 /* create the coarse KSP object only once with defaults */ 4232 if (coarse_mat_is) { 4233 MatReuse coarse_mat_reuse; 4234 PetscViewer dbg_viewer = NULL; 4235 if (pcbddc->dbg_flag) { 4236 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4237 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4238 } 4239 if (!pcbddc->coarse_ksp) { 4240 char prefix[256],str_level[16]; 4241 size_t len; 4242 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4243 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4244 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4245 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4246 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4247 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4248 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4249 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4250 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4251 /* prefix */ 4252 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4253 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4254 if (!pcbddc->current_level) { 4255 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4256 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4257 } else { 4258 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4259 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4260 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4261 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4262 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4263 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4264 } 4265 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4266 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4267 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4268 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4269 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4270 /* allow user customization */ 4271 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4272 } 4273 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4274 if (nisdofs) { 4275 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4276 for (i=0;i<nisdofs;i++) { 4277 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4278 } 4279 } 4280 if (nisneu) { 4281 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4282 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4283 } 4284 4285 /* get some info after set from options */ 4286 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4287 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4288 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4289 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4290 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4291 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4292 isbddc = PETSC_FALSE; 4293 } 4294 if (isredundant) { 4295 KSP inner_ksp; 4296 PC inner_pc; 4297 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4298 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4299 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4300 } 4301 4302 /* assemble coarse matrix */ 4303 if (coarse_reuse) { 4304 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4305 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4306 coarse_mat_reuse = MAT_REUSE_MATRIX; 4307 } else { 4308 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4309 } 4310 if (isbddc || isnn) { 4311 if (pcbddc->coarsening_ratio > 1) { 4312 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4313 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4314 if (pcbddc->dbg_flag) { 4315 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4316 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4317 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4318 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4319 } 4320 } 4321 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4322 } else { 4323 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4324 coarse_mat = coarse_mat_is; 4325 } 4326 } else { 4327 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4328 } 4329 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4330 4331 /* propagate symmetry info to coarse matrix */ 4332 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4333 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4334 4335 /* set operators */ 4336 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4337 if (pcbddc->dbg_flag) { 4338 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4339 } 4340 } else { /* processes non partecipating to coarse solver (if any) */ 4341 coarse_mat = 0; 4342 } 4343 ierr = PetscFree(isarray);CHKERRQ(ierr); 4344 #if 0 4345 { 4346 PetscViewer viewer; 4347 char filename[256]; 4348 sprintf(filename,"coarse_mat.m"); 4349 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4350 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4351 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4352 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4353 } 4354 #endif 4355 4356 /* Compute coarse null space (special handling by BDDC only) */ 4357 if (pcbddc->NullSpace) { 4358 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4359 } 4360 4361 if (pcbddc->coarse_ksp) { 4362 Vec crhs,csol; 4363 PetscBool ispreonly; 4364 if (CoarseNullSpace) { 4365 if (isbddc) { 4366 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4367 } else { 4368 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4369 } 4370 } 4371 /* setup coarse ksp */ 4372 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4373 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4374 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4375 /* hack */ 4376 if (!csol) { 4377 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4378 } 4379 if (!crhs) { 4380 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4381 } 4382 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4383 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4384 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4385 KSP check_ksp; 4386 KSPType check_ksp_type; 4387 PC check_pc; 4388 Vec check_vec,coarse_vec; 4389 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4390 PetscInt its; 4391 PetscBool compute_eigs; 4392 PetscReal *eigs_r,*eigs_c; 4393 PetscInt neigs; 4394 const char *prefix; 4395 4396 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4397 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4398 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4399 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4400 if (ispreonly) { 4401 check_ksp_type = KSPPREONLY; 4402 compute_eigs = PETSC_FALSE; 4403 } else { 4404 check_ksp_type = KSPGMRES; 4405 compute_eigs = PETSC_TRUE; 4406 } 4407 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4408 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4409 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4410 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4411 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4412 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4413 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4414 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4415 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4416 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4417 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4418 /* create random vec */ 4419 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4420 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4421 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4422 if (CoarseNullSpace) { 4423 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4424 } 4425 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4426 /* solve coarse problem */ 4427 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4428 if (CoarseNullSpace) { 4429 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4430 } 4431 /* set eigenvalue estimation if preonly has not been requested */ 4432 if (compute_eigs) { 4433 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4434 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4435 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4436 lambda_max = eigs_r[neigs-1]; 4437 lambda_min = eigs_r[0]; 4438 if (pcbddc->use_coarse_estimates) { 4439 if (lambda_max>lambda_min) { 4440 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4441 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4442 } 4443 } 4444 } 4445 4446 /* check coarse problem residual error */ 4447 if (pcbddc->dbg_flag) { 4448 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4449 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4450 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4451 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4452 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4453 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4454 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4455 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4456 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4457 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4458 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4459 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4460 if (compute_eigs) { 4461 PetscReal lambda_max_s,lambda_min_s; 4462 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4463 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4464 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4465 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4466 for (i=0;i<neigs;i++) { 4467 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4468 } 4469 } 4470 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4471 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4472 } 4473 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4474 if (compute_eigs) { 4475 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4476 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4477 } 4478 } 4479 } 4480 /* print additional info */ 4481 if (pcbddc->dbg_flag) { 4482 /* waits until all processes reaches this point */ 4483 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4484 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4485 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4486 } 4487 4488 /* free memory */ 4489 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4490 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4491 PetscFunctionReturn(0); 4492 } 4493 4494 #undef __FUNCT__ 4495 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4496 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4497 { 4498 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4499 PC_IS* pcis = (PC_IS*)pc->data; 4500 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4501 PetscInt i,local_size,coarse_size=0; 4502 PetscInt *local_primal_indices=NULL; 4503 PetscInt *t_local_primal_indices=NULL; 4504 PetscErrorCode ierr; 4505 4506 PetscFunctionBegin; 4507 /* Compute global number of coarse dofs */ 4508 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4509 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4510 } 4511 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr); 4512 ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr); 4513 local_size = 0; 4514 for (i=0;i<pcbddc->local_primal_size_cc;i++) { 4515 PetscInt j; 4516 for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j; 4517 } 4518 ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr); 4519 if (local_size != pcbddc->local_primal_size) { 4520 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4521 } 4522 4523 /* check numbering */ 4524 if (pcbddc->dbg_flag) { 4525 PetscScalar coarsesum,*array; 4526 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4527 4528 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4529 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4530 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4531 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4532 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4533 for (i=0;i<pcbddc->local_primal_size;i++) { 4534 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4535 } 4536 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4537 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4538 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4539 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4540 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4541 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4542 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4543 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4544 for (i=0;i<pcis->n;i++) { 4545 if (array[i] == 1.0) { 4546 set_error = PETSC_TRUE; 4547 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4548 } 4549 } 4550 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4551 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4552 for (i=0;i<pcis->n;i++) { 4553 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4554 } 4555 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4556 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4557 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4558 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4559 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4560 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4561 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4562 PetscInt *gidxs; 4563 4564 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4565 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4566 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4567 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4568 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4569 for (i=0;i<pcbddc->local_primal_size;i++) { 4570 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4571 } 4572 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4573 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4574 } 4575 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4576 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4577 } 4578 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4579 /* get back data */ 4580 *coarse_size_n = coarse_size; 4581 *local_primal_indices_n = local_primal_indices; 4582 PetscFunctionReturn(0); 4583 } 4584 4585 #undef __FUNCT__ 4586 #define __FUNCT__ "PCBDDCGlobalToLocal" 4587 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4588 { 4589 IS localis_t; 4590 PetscInt i,lsize,*idxs,n; 4591 PetscScalar *vals; 4592 PetscErrorCode ierr; 4593 4594 PetscFunctionBegin; 4595 /* get indices in local ordering exploiting local to global map */ 4596 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4597 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4598 for (i=0;i<lsize;i++) vals[i] = 1.0; 4599 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4600 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4601 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4602 if (idxs) { /* multilevel guard */ 4603 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4604 } 4605 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4606 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4607 ierr = PetscFree(vals);CHKERRQ(ierr); 4608 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4609 /* now compute set in local ordering */ 4610 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4611 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4612 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4613 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4614 for (i=0,lsize=0;i<n;i++) { 4615 if (PetscRealPart(vals[i]) > 0.5) { 4616 lsize++; 4617 } 4618 } 4619 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4620 for (i=0,lsize=0;i<n;i++) { 4621 if (PetscRealPart(vals[i]) > 0.5) { 4622 idxs[lsize++] = i; 4623 } 4624 } 4625 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4626 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4627 *localis = localis_t; 4628 PetscFunctionReturn(0); 4629 } 4630 4631 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4632 #undef __FUNCT__ 4633 #define __FUNCT__ "PCBDDCMatMult_Private" 4634 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4635 { 4636 PCBDDCChange_ctx change_ctx; 4637 PetscErrorCode ierr; 4638 4639 PetscFunctionBegin; 4640 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4641 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4642 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4643 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4644 PetscFunctionReturn(0); 4645 } 4646 4647 #undef __FUNCT__ 4648 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4649 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4650 { 4651 PCBDDCChange_ctx change_ctx; 4652 PetscErrorCode ierr; 4653 4654 PetscFunctionBegin; 4655 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4656 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4657 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4658 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4659 PetscFunctionReturn(0); 4660 } 4661 4662 #undef __FUNCT__ 4663 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4664 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4665 { 4666 PC_IS *pcis=(PC_IS*)pc->data; 4667 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4668 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4669 Mat S_j; 4670 PetscInt *used_xadj,*used_adjncy; 4671 PetscBool free_used_adj; 4672 PetscErrorCode ierr; 4673 4674 PetscFunctionBegin; 4675 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4676 free_used_adj = PETSC_FALSE; 4677 if (pcbddc->sub_schurs_layers == -1) { 4678 used_xadj = NULL; 4679 used_adjncy = NULL; 4680 } else { 4681 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4682 used_xadj = pcbddc->mat_graph->xadj; 4683 used_adjncy = pcbddc->mat_graph->adjncy; 4684 } else if (pcbddc->computed_rowadj) { 4685 used_xadj = pcbddc->mat_graph->xadj; 4686 used_adjncy = pcbddc->mat_graph->adjncy; 4687 } else { 4688 PetscBool flg_row=PETSC_FALSE; 4689 const PetscInt *xadj,*adjncy; 4690 PetscInt nvtxs; 4691 4692 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4693 if (flg_row) { 4694 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4695 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4696 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4697 free_used_adj = PETSC_TRUE; 4698 } else { 4699 pcbddc->sub_schurs_layers = -1; 4700 used_xadj = NULL; 4701 used_adjncy = NULL; 4702 } 4703 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4704 } 4705 } 4706 4707 /* setup sub_schurs data */ 4708 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4709 if (!sub_schurs->use_mumps) { 4710 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4711 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4712 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4713 } else { 4714 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4715 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4716 } 4717 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4718 4719 /* free adjacency */ 4720 if (free_used_adj) { 4721 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4722 } 4723 PetscFunctionReturn(0); 4724 } 4725 4726 #undef __FUNCT__ 4727 #define __FUNCT__ "PCBDDCInitSubSchurs" 4728 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4729 { 4730 PC_IS *pcis=(PC_IS*)pc->data; 4731 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4732 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4733 PCBDDCGraph graph; 4734 PetscErrorCode ierr; 4735 4736 PetscFunctionBegin; 4737 /* attach interface graph for determining subsets */ 4738 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4739 IS verticesIS; 4740 4741 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4742 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4743 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4744 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4745 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4746 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4747 /* 4748 if (pcbddc->dbg_flag) { 4749 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4750 } 4751 */ 4752 } else { 4753 graph = pcbddc->mat_graph; 4754 } 4755 4756 /* sub_schurs init */ 4757 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4758 4759 /* free graph struct */ 4760 if (pcbddc->sub_schurs_rebuild) { 4761 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4762 } 4763 PetscFunctionReturn(0); 4764 } 4765