1e0eea495SMark #if !defined(PETSCLANDAU_H) 2e0eea495SMark #define PETSCLANDAU_H 3e0eea495SMark 4e0eea495SMark #include <petscdmplex.h> /*I "petscdmplex.h" I*/ 5e0eea495SMark #include <petscts.h> 6e0eea495SMark 7*8594ddcfSMark Adams PETSC_EXTERN PetscErrorCode DMPlexLandauPrintNorms(Vec, PetscInt); 8*8594ddcfSMark Adams PETSC_EXTERN PetscErrorCode DMPlexLandauCreateVelocitySpace(MPI_Comm,PetscInt,const char[],Vec*,Mat*,DM*); 9*8594ddcfSMark Adams PETSC_EXTERN PetscErrorCode DMPlexLandauDestroyVelocitySpace(DM*); 10*8594ddcfSMark Adams PETSC_EXTERN PetscErrorCode DMPlexLandauAddMaxwellians(DM, Vec, PetscReal, PetscReal[], PetscReal[], PetscInt, PetscInt, void *); 11*8594ddcfSMark Adams PETSC_EXTERN PetscErrorCode DMPlexLandauCreateMassMatrix(DM dm, Mat *Amat); 12*8594ddcfSMark Adams PETSC_EXTERN PetscErrorCode DMPlexLandauIFunction(TS, PetscReal,Vec,Vec,Vec,void *); 13*8594ddcfSMark Adams PETSC_EXTERN PetscErrorCode DMPlexLandauIJacobian(TS, PetscReal,Vec,Vec,PetscReal,Mat,Mat,void *); 14e0eea495SMark 15bfc784b7SMark Adams typedef int LandauIdx; 16bfc784b7SMark Adams 17e0eea495SMark /* the Fokker-Planck-Landau context */ 18e0eea495SMark #if !defined(LANDAU_DIM) 19e0eea495SMark #define LANDAU_DIM 2 20e0eea495SMark #endif 21a587d139SMark 22e0eea495SMark #if !defined(LANDAU_MAX_SPECIES) 2352cdd6eaSMark #if LANDAU_DIM==2 24e0eea495SMark #define LANDAU_MAX_SPECIES 10 258a6f2e61SMark Adams #define LANDAU_MAX_GRIDS 3 2652cdd6eaSMark #else 2752cdd6eaSMark #define LANDAU_MAX_SPECIES 3 288a6f2e61SMark Adams #define LANDAU_MAX_GRIDS 2 2952cdd6eaSMark #endif 308a6f2e61SMark Adams #else 318a6f2e61SMark Adams #define LANDAU_MAX_GRIDS 3 32e0eea495SMark #endif 33a587d139SMark 34f37ccb5fSMark Adams #define LANDAU_MAX_BATCH_SZ 320 358fdabdddSMark Adams 36a587d139SMark #if !defined(LANDAU_MAX_Q) 37a587d139SMark #if defined(LANDAU_MAX_NQ) 38a587d139SMark #error"LANDAU_MAX_NQ but not LANDAU_MAX_Q. Use -DLANDAU_MAX_Q=4 for Q3 elements" 39a587d139SMark #endif 4052cdd6eaSMark #if LANDAU_DIM==2 41a587d139SMark #define LANDAU_MAX_Q 5 42e0eea495SMark #else 43a587d139SMark #define LANDAU_MAX_Q 3 44a587d139SMark #endif 45a587d139SMark #else 46a587d139SMark #undef LANDAU_MAX_NQ 47e0eea495SMark #endif 48a587d139SMark 49a587d139SMark #if LANDAU_DIM==2 50930e68a5SMark Adams #define LANDAU_MAX_Q_FACE LANDAU_MAX_Q 51a587d139SMark #define LANDAU_MAX_NQ (LANDAU_MAX_Q*LANDAU_MAX_Q) 52a587d139SMark #else 53930e68a5SMark Adams #define LANDAU_MAX_Q_FACE (LANDAU_MAX_Q*LANDAU_MAX_Q) 54a587d139SMark #define LANDAU_MAX_NQ (LANDAU_MAX_Q*LANDAU_MAX_Q*LANDAU_MAX_Q) 55a587d139SMark #endif 56a587d139SMark 57e0eea495SMark typedef enum {LANDAU_CUDA, LANDAU_KOKKOS, LANDAU_CPU} LandauDeviceType; 58e8d2b73aSMark Adams 598fdabdddSMark Adams // static data - will be "device" data 60e8d2b73aSMark Adams typedef struct { 61e8d2b73aSMark Adams void *invJ; // nip*dim*dim 62e8d2b73aSMark Adams void *D; // nq*nb*dim 63e8d2b73aSMark Adams void *B; // nq*nb 64e8d2b73aSMark Adams void *alpha; // ns 65e8d2b73aSMark Adams void *beta; // ns 66e8d2b73aSMark Adams void *invMass; // ns 67e8d2b73aSMark Adams void *w; // nip 68e8d2b73aSMark Adams void *x; // nip 69e8d2b73aSMark Adams void *y; // nip 70e8d2b73aSMark Adams void *z; // nip 71e8d2b73aSMark Adams void *Eq_m; // ns - dynamic 72e8d2b73aSMark Adams void *f; // nip*Nf - dynamic (IP) 73e8d2b73aSMark Adams void *dfdx; // nip*Nf - dynamic (IP) 74e8d2b73aSMark Adams void *dfdy; // nip*Nf - dynamic (IP) 75e8d2b73aSMark Adams void *dfdz; // nip*Nf - dynamic (IP) 76e8d2b73aSMark Adams int dim_,ns_,nip_,nq_,nb_; 778fdabdddSMark Adams void *NCells; // remove and ise elem_offset - TODO 788fdabdddSMark Adams void *species_offset; // for each grid, but same for all batched vertices 798fdabdddSMark Adams void *mat_offset; // for each grid, but same for all batched vertices 808fdabdddSMark Adams void *elem_offset; // for each grid, but same for all batched vertices 818fdabdddSMark Adams void *ip_offset; // for each grid, but same for all batched vertices 828fdabdddSMark Adams void *ipf_offset; // for each grid, but same for all batched vertices 838fdabdddSMark Adams void *ipfdf_data; // for each grid, but same for all batched vertices 848fdabdddSMark Adams void *maps; // for each grid, but same for all batched vertices 85bfc784b7SMark Adams // COO 86bfc784b7SMark Adams LandauIdx *coo_elem_offsets; 87bfc784b7SMark Adams LandauIdx (*coo_elem_point_offsets)[LANDAU_MAX_NQ+1]; 88bfc784b7SMark Adams LandauIdx *coo_elem_fullNb; 89bfc784b7SMark Adams LandauIdx n_coo_cellsTot; 90bfc784b7SMark Adams LandauIdx coo_size; 9113241b68SMark Adams LandauIdx coo_max_fullnb; 928a6f2e61SMark Adams } LandauStaticData; 93e8d2b73aSMark Adams 948fdabdddSMark Adams typedef enum {LANDAU_EX2_TSSOLVE, LANDAU_MATRIX_TOTAL, LANDAU_OPERATOR, LANDAU_JACOBIAN_COUNT, LANDAU_JACOBIAN, LANDAU_MASS, LANDAU_F_DF, LANDAU_KERNEL, KSP_FACTOR, KSP_SOLVE, LANDAU_NUM_TIMERS} LandauOMPTimers; 958fdabdddSMark Adams 96e0eea495SMark typedef struct { 97e0eea495SMark PetscBool interpolate; /* Generate intermediate mesh elements */ 98a587d139SMark PetscBool gpu_assembly; 998a6f2e61SMark Adams MPI_Comm comm; /* global communicator to use for errors and diagnostics */ 1008fdabdddSMark Adams double times[LANDAU_NUM_TIMERS]; 1018a6f2e61SMark Adams PetscBool use_matrix_mass; 1028a6f2e61SMark Adams /* FE */ 103e0eea495SMark PetscFE fe[LANDAU_MAX_SPECIES]; 104e0eea495SMark /* geometry */ 1058a6f2e61SMark Adams PetscReal radius[LANDAU_MAX_GRIDS]; 1068a6f2e61SMark Adams PetscReal re_radius; /* RE: radius of refinement along v_perp=0, z>0 */ 1078a6f2e61SMark Adams PetscReal vperp0_radius1; /* RE: radius of refinement along v_perp=0 */ 1088a6f2e61SMark Adams PetscReal vperp0_radius2; /* RE: radius of refinement along v_perp=0 after origin AMR refinement */ 1098a6f2e61SMark Adams PetscBool sphere; // not used 1108a6f2e61SMark Adams PetscBool inflate; // not used 1118a6f2e61SMark Adams PetscReal i_radius[LANDAU_MAX_GRIDS]; // not used 1128a6f2e61SMark Adams PetscReal e_radius; // not used 1138a6f2e61SMark Adams PetscInt num_sections; // not used 1148a6f2e61SMark Adams /* AMR */ 1158a6f2e61SMark Adams PetscBool use_p4est; 1168a6f2e61SMark Adams PetscInt numRERefine; /* RE: refinement along v_perp=0, z > 0 */ 1178a6f2e61SMark Adams PetscInt nZRefine1; /* RE: origin refinement after v_perp=0 refinement */ 1188a6f2e61SMark Adams PetscInt nZRefine2; /* RE: origin refinement after origin AMR refinement */ 1198a6f2e61SMark Adams PetscInt numAMRRefine[LANDAU_MAX_GRIDS]; /* normal AMR - refine from origin */ 1208a6f2e61SMark Adams PetscInt postAMRRefine[LANDAU_MAX_GRIDS]; /* uniform refinement of AMR */ 1218a6f2e61SMark Adams /* relativistic */ 122cefb98e8SMark Adams PetscBool use_energy_tensor_trick; 123cefb98e8SMark Adams PetscBool use_relativistic_corrections; 124e0eea495SMark /* physics */ 125e0eea495SMark PetscReal thermal_temps[LANDAU_MAX_SPECIES]; 126e0eea495SMark PetscReal masses[LANDAU_MAX_SPECIES]; /* mass of each species */ 127e0eea495SMark PetscReal charges[LANDAU_MAX_SPECIES]; /* charge of each species */ 128e0eea495SMark PetscReal n[LANDAU_MAX_SPECIES]; /* number density of each species */ 129e0eea495SMark PetscReal m_0; /* reference mass */ 130e0eea495SMark PetscReal v_0; /* reference velocity */ 131e0eea495SMark PetscReal n_0; /* reference number density */ 132e0eea495SMark PetscReal t_0; /* reference time */ 133e0eea495SMark PetscReal Ez; 134e0eea495SMark PetscReal epsilon0; 135e0eea495SMark PetscReal k; 136e0eea495SMark PetscReal lnLam; 1378a6f2e61SMark Adams PetscReal electronShift; 138e0eea495SMark PetscInt num_species; 1398a6f2e61SMark Adams PetscInt num_grids; 1408fdabdddSMark Adams PetscInt species_offset[LANDAU_MAX_GRIDS+1]; // for each grid, but same for all batched vertices 1418fdabdddSMark Adams PetscInt mat_offset[LANDAU_MAX_GRIDS+1]; // for each grid, but same for all batched vertices 142cb25d741SMark Adams // batching 143cb25d741SMark Adams PetscBool jacobian_field_major_order; // this could be a type but lets not get pedantic 144cb25d741SMark Adams VecScatter plex_batch; 145cb25d741SMark Adams Vec work_vec; 146cb25d741SMark Adams IS batch_is; 147cb25d741SMark Adams PetscErrorCode (*seqaij_mult)(Mat,Vec,Vec); 148cb25d741SMark Adams PetscErrorCode (*seqaij_multtranspose)(Mat,Vec,Vec); 149cb25d741SMark Adams PetscErrorCode (*seqaij_solve)(Mat,Vec,Vec); 150cb25d741SMark Adams PetscErrorCode (*seqaij_getdiagonal)(Mat,Vec); 151bfc784b7SMark Adams /* COO */ 152aab769cbSMark Adams PetscBool coo_assembly; 153e0eea495SMark /* cache */ 154e0eea495SMark Mat J; 155e0eea495SMark Mat M; 156e0eea495SMark Vec X; 157e0eea495SMark /* derived type */ 158e0eea495SMark void *data; 159e0eea495SMark /* computing */ 160e0eea495SMark LandauDeviceType deviceType; 1618a6f2e61SMark Adams DM pack; 1628a6f2e61SMark Adams DM plex[LANDAU_MAX_GRIDS]; 1638fdabdddSMark Adams LandauStaticData SData_d; /* static geometric data on device */ 16454545eeeSMark Adams /* diagnostics */ 16554545eeeSMark Adams PetscInt verbose; 16654545eeeSMark Adams PetscLogEvent events[20]; 167c751c0a2SMark Adams PetscLogStage stage; 1688fdabdddSMark Adams PetscBool aux_bool; /* helper */ 1698fdabdddSMark Adams PetscInt batch_sz; 1708fdabdddSMark Adams PetscInt batch_view_idx; 171e0eea495SMark } LandauCtx; 172e0eea495SMark 1738fdabdddSMark Adams #define LANDAU_SPECIES_MAJOR 1748fdabdddSMark Adams #if !defined(LANDAU_SPECIES_MAJOR) 1758fdabdddSMark Adams #define LAND_PACK_IDX(_b,_g) (_b*ctx->num_grids + _g) 1768fdabdddSMark Adams #define LAND_MOFFSET(_b,_g,_nbch,_ngrid,_mat_off) (_b*_mat_off[_ngrid] + _mat_off[_g]) 1778fdabdddSMark Adams #else 1788fdabdddSMark Adams #define LAND_PACK_IDX(_b,_g) (_g*ctx->batch_sz + _b) 1798fdabdddSMark Adams #define LAND_MOFFSET(_b,_g,_nbch,_ngrid,_mat_off) (_nbch*_mat_off[_g] + _b*(_mat_off[_g+1] - _mat_off[_g])) 1808fdabdddSMark Adams #endif 1818fdabdddSMark Adams 182a587d139SMark typedef struct { 183a587d139SMark PetscReal scale; 184bfc784b7SMark Adams LandauIdx gid; // Landau matrix index (<10,000) 185a587d139SMark } pointInterpolationP4est; 186a587d139SMark typedef struct _lP4estVertexMaps { 187a587d139SMark LandauIdx (*gIdx)[LANDAU_MAX_SPECIES][LANDAU_MAX_NQ]; // #elems * LANDAU_MAX_NQ (spoof for max , Nb) on device, 188a587d139SMark LandauIdx num_elements; 189a587d139SMark LandauIdx num_reduced; 190a587d139SMark LandauIdx num_face; // (Q or Q^2 for 3D) 191a587d139SMark LandauDeviceType deviceType; 192a587d139SMark PetscInt Nf; 193a587d139SMark pointInterpolationP4est (*c_maps)[LANDAU_MAX_Q_FACE]; 1948a6f2e61SMark Adams struct _lP4estVertexMaps*d_self; 195a587d139SMark void *vp1,*vp2,*vp3; 1968a6f2e61SMark Adams PetscInt numgrids; 197a587d139SMark } P4estVertexMaps; 198a587d139SMark 199e0eea495SMark PETSC_EXTERN PetscErrorCode LandauCreateColoring(Mat, DM, PetscContainer *); 200e0eea495SMark #if defined(PETSC_HAVE_CUDA) 201a82411e9SMark Adams PETSC_EXTERN PetscErrorCode LandauCUDAJacobian(DM[], const PetscInt, const PetscInt, const PetscInt, const PetscInt[], PetscReal[], PetscScalar[], const PetscScalar[], 202d2319daeSMark Adams const LandauStaticData *, const PetscReal, const PetscLogEvent[],const PetscInt[], const PetscInt[], Mat[], Mat); 2038a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauCUDACreateMatMaps(P4estVertexMaps *, pointInterpolationP4est (*)[LANDAU_MAX_Q_FACE], PetscInt[], PetscInt, PetscInt); 2048a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauCUDADestroyMatMaps(P4estVertexMaps *, PetscInt); 2058fdabdddSMark Adams PETSC_EXTERN PetscErrorCode LandauCUDAStaticDataSet(DM, const PetscInt, const PetscInt, const PetscInt, PetscInt[], PetscInt[], PetscInt[], PetscReal [], PetscReal [], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], 2068a6f2e61SMark Adams PetscReal[], LandauStaticData *); 2078a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauCUDAStaticDataClear(LandauStaticData *); 208e0eea495SMark #endif 209e0eea495SMark #if defined(PETSC_HAVE_KOKKOS) 210a82411e9SMark Adams PETSC_EXTERN PetscErrorCode LandauKokkosJacobian(DM[], const PetscInt, const PetscInt, const PetscInt, const PetscInt[], PetscReal[], PetscScalar[], const PetscScalar[], 211d2319daeSMark Adams const LandauStaticData *, const PetscReal, const PetscLogEvent[], const PetscInt[], const PetscInt[], Mat[], Mat); 2128a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauKokkosCreateMatMaps(P4estVertexMaps *, pointInterpolationP4est (*)[LANDAU_MAX_Q_FACE], PetscInt[], PetscInt, PetscInt); 2138a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauKokkosDestroyMatMaps(P4estVertexMaps *, PetscInt); 2148fdabdddSMark Adams PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataSet(DM, const PetscInt, const PetscInt, const PetscInt, PetscInt[], PetscInt[], PetscInt[], PetscReal [], PetscReal [], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], 2158a6f2e61SMark Adams PetscReal[], LandauStaticData *); 2168a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataClear(LandauStaticData*); 217e0eea495SMark #endif 218e0eea495SMark 219e0eea495SMark #endif /* PETSCLANDAU_H */ 220