xref: /petsc/include/petsclandau.h (revision 8594ddcf839049ebf2aedeadda7d1e3dee42d2b6)
1e0eea495SMark #if !defined(PETSCLANDAU_H)
2e0eea495SMark #define PETSCLANDAU_H
3e0eea495SMark 
4e0eea495SMark #include <petscdmplex.h> /*I      "petscdmplex.h"    I*/
5e0eea495SMark #include <petscts.h>
6e0eea495SMark 
7*8594ddcfSMark Adams PETSC_EXTERN PetscErrorCode DMPlexLandauPrintNorms(Vec, PetscInt);
8*8594ddcfSMark Adams PETSC_EXTERN PetscErrorCode DMPlexLandauCreateVelocitySpace(MPI_Comm,PetscInt,const char[],Vec*,Mat*,DM*);
9*8594ddcfSMark Adams PETSC_EXTERN PetscErrorCode DMPlexLandauDestroyVelocitySpace(DM*);
10*8594ddcfSMark Adams PETSC_EXTERN PetscErrorCode DMPlexLandauAddMaxwellians(DM, Vec, PetscReal, PetscReal[], PetscReal[], PetscInt, PetscInt, void *);
11*8594ddcfSMark Adams PETSC_EXTERN PetscErrorCode DMPlexLandauCreateMassMatrix(DM dm, Mat *Amat);
12*8594ddcfSMark Adams PETSC_EXTERN PetscErrorCode DMPlexLandauIFunction(TS, PetscReal,Vec,Vec,Vec,void *);
13*8594ddcfSMark Adams PETSC_EXTERN PetscErrorCode DMPlexLandauIJacobian(TS, PetscReal,Vec,Vec,PetscReal,Mat,Mat,void *);
14e0eea495SMark 
15bfc784b7SMark Adams typedef int LandauIdx;
16bfc784b7SMark Adams 
17e0eea495SMark /* the Fokker-Planck-Landau context */
18e0eea495SMark #if !defined(LANDAU_DIM)
19e0eea495SMark #define LANDAU_DIM 2
20e0eea495SMark #endif
21a587d139SMark 
22e0eea495SMark #if !defined(LANDAU_MAX_SPECIES)
2352cdd6eaSMark #if LANDAU_DIM==2
24e0eea495SMark #define LANDAU_MAX_SPECIES 10
258a6f2e61SMark Adams #define LANDAU_MAX_GRIDS 3
2652cdd6eaSMark #else
2752cdd6eaSMark #define LANDAU_MAX_SPECIES 3
288a6f2e61SMark Adams #define LANDAU_MAX_GRIDS 2
2952cdd6eaSMark #endif
308a6f2e61SMark Adams #else
318a6f2e61SMark Adams #define LANDAU_MAX_GRIDS 3
32e0eea495SMark #endif
33a587d139SMark 
34f37ccb5fSMark Adams #define LANDAU_MAX_BATCH_SZ 320
358fdabdddSMark Adams 
36a587d139SMark #if !defined(LANDAU_MAX_Q)
37a587d139SMark #if defined(LANDAU_MAX_NQ)
38a587d139SMark #error"LANDAU_MAX_NQ but not LANDAU_MAX_Q. Use -DLANDAU_MAX_Q=4 for Q3 elements"
39a587d139SMark #endif
4052cdd6eaSMark #if LANDAU_DIM==2
41a587d139SMark #define LANDAU_MAX_Q 5
42e0eea495SMark #else
43a587d139SMark #define LANDAU_MAX_Q 3
44a587d139SMark #endif
45a587d139SMark #else
46a587d139SMark #undef LANDAU_MAX_NQ
47e0eea495SMark #endif
48a587d139SMark 
49a587d139SMark #if LANDAU_DIM==2
50930e68a5SMark Adams #define LANDAU_MAX_Q_FACE LANDAU_MAX_Q
51a587d139SMark #define LANDAU_MAX_NQ (LANDAU_MAX_Q*LANDAU_MAX_Q)
52a587d139SMark #else
53930e68a5SMark Adams #define LANDAU_MAX_Q_FACE (LANDAU_MAX_Q*LANDAU_MAX_Q)
54a587d139SMark #define LANDAU_MAX_NQ (LANDAU_MAX_Q*LANDAU_MAX_Q*LANDAU_MAX_Q)
55a587d139SMark #endif
56a587d139SMark 
57e0eea495SMark typedef enum {LANDAU_CUDA, LANDAU_KOKKOS, LANDAU_CPU} LandauDeviceType;
58e8d2b73aSMark Adams 
598fdabdddSMark Adams // static data - will be "device" data
60e8d2b73aSMark Adams typedef struct {
61e8d2b73aSMark Adams   void  *invJ;  // nip*dim*dim
62e8d2b73aSMark Adams   void  *D;     // nq*nb*dim
63e8d2b73aSMark Adams   void  *B;     // nq*nb
64e8d2b73aSMark Adams   void  *alpha; // ns
65e8d2b73aSMark Adams   void  *beta;  // ns
66e8d2b73aSMark Adams   void  *invMass; // ns
67e8d2b73aSMark Adams   void  *w; // nip
68e8d2b73aSMark Adams   void  *x; // nip
69e8d2b73aSMark Adams   void  *y; // nip
70e8d2b73aSMark Adams   void  *z; // nip
71e8d2b73aSMark Adams   void  *Eq_m; // ns - dynamic
72e8d2b73aSMark Adams   void  *f; //  nip*Nf - dynamic (IP)
73e8d2b73aSMark Adams   void  *dfdx; // nip*Nf - dynamic (IP)
74e8d2b73aSMark Adams   void  *dfdy; // nip*Nf - dynamic (IP)
75e8d2b73aSMark Adams   void  *dfdz; // nip*Nf - dynamic (IP)
76e8d2b73aSMark Adams   int   dim_,ns_,nip_,nq_,nb_;
778fdabdddSMark Adams   void  *NCells; // remove and ise elem_offset - TODO
788fdabdddSMark Adams   void  *species_offset; // for each grid, but same for all batched vertices
798fdabdddSMark Adams   void  *mat_offset; // for each grid, but same for all batched vertices
808fdabdddSMark Adams   void  *elem_offset; // for each grid, but same for all batched vertices
818fdabdddSMark Adams   void  *ip_offset; // for each grid, but same for all batched vertices
828fdabdddSMark Adams   void  *ipf_offset; // for each grid, but same for all batched vertices
838fdabdddSMark Adams   void  *ipfdf_data; // for each grid, but same for all batched vertices
848fdabdddSMark Adams   void  *maps; // for each grid, but same for all batched vertices
85bfc784b7SMark Adams   // COO
86bfc784b7SMark Adams   LandauIdx        *coo_elem_offsets;
87bfc784b7SMark Adams   LandauIdx        (*coo_elem_point_offsets)[LANDAU_MAX_NQ+1];
88bfc784b7SMark Adams   LandauIdx        *coo_elem_fullNb;
89bfc784b7SMark Adams   LandauIdx        n_coo_cellsTot;
90bfc784b7SMark Adams   LandauIdx        coo_size;
9113241b68SMark Adams   LandauIdx        coo_max_fullnb;
928a6f2e61SMark Adams } LandauStaticData;
93e8d2b73aSMark Adams 
948fdabdddSMark Adams typedef enum {LANDAU_EX2_TSSOLVE, LANDAU_MATRIX_TOTAL, LANDAU_OPERATOR, LANDAU_JACOBIAN_COUNT, LANDAU_JACOBIAN, LANDAU_MASS, LANDAU_F_DF, LANDAU_KERNEL, KSP_FACTOR, KSP_SOLVE, LANDAU_NUM_TIMERS} LandauOMPTimers;
958fdabdddSMark Adams 
96e0eea495SMark typedef struct {
97e0eea495SMark   PetscBool      interpolate;                  /* Generate intermediate mesh elements */
98a587d139SMark   PetscBool      gpu_assembly;
998a6f2e61SMark Adams   MPI_Comm       comm; /* global communicator to use for errors and diagnostics */
1008fdabdddSMark Adams   double         times[LANDAU_NUM_TIMERS];
1018a6f2e61SMark Adams   PetscBool      use_matrix_mass;
1028a6f2e61SMark Adams   /* FE */
103e0eea495SMark   PetscFE        fe[LANDAU_MAX_SPECIES];
104e0eea495SMark   /* geometry  */
1058a6f2e61SMark Adams   PetscReal      radius[LANDAU_MAX_GRIDS];
1068a6f2e61SMark Adams   PetscReal      re_radius;           /* RE: radius of refinement along v_perp=0, z>0 */
1078a6f2e61SMark Adams   PetscReal      vperp0_radius1;      /* RE: radius of refinement along v_perp=0 */
1088a6f2e61SMark Adams   PetscReal      vperp0_radius2;      /* RE: radius of refinement along v_perp=0 after origin AMR refinement */
1098a6f2e61SMark Adams   PetscBool      sphere; // not used
1108a6f2e61SMark Adams   PetscBool      inflate; // not used
1118a6f2e61SMark Adams   PetscReal      i_radius[LANDAU_MAX_GRIDS]; // not used
1128a6f2e61SMark Adams   PetscReal      e_radius; // not used
1138a6f2e61SMark Adams   PetscInt       num_sections; // not used
1148a6f2e61SMark Adams   /* AMR */
1158a6f2e61SMark Adams   PetscBool      use_p4est;
1168a6f2e61SMark Adams   PetscInt       numRERefine;        /* RE: refinement along v_perp=0, z > 0 */
1178a6f2e61SMark Adams   PetscInt       nZRefine1;          /* RE: origin refinement after v_perp=0 refinement */
1188a6f2e61SMark Adams   PetscInt       nZRefine2;          /* RE: origin refinement after origin AMR refinement */
1198a6f2e61SMark Adams   PetscInt       numAMRRefine[LANDAU_MAX_GRIDS];  /* normal AMR - refine from origin */
1208a6f2e61SMark Adams   PetscInt       postAMRRefine[LANDAU_MAX_GRIDS]; /* uniform refinement of AMR */
1218a6f2e61SMark Adams   /* relativistic */
122cefb98e8SMark Adams   PetscBool      use_energy_tensor_trick;
123cefb98e8SMark Adams   PetscBool      use_relativistic_corrections;
124e0eea495SMark   /* physics */
125e0eea495SMark   PetscReal      thermal_temps[LANDAU_MAX_SPECIES];
126e0eea495SMark   PetscReal      masses[LANDAU_MAX_SPECIES];  /* mass of each species  */
127e0eea495SMark   PetscReal      charges[LANDAU_MAX_SPECIES]; /* charge of each species  */
128e0eea495SMark   PetscReal      n[LANDAU_MAX_SPECIES];       /* number density of each species  */
129e0eea495SMark   PetscReal      m_0;      /* reference mass */
130e0eea495SMark   PetscReal      v_0;      /* reference velocity */
131e0eea495SMark   PetscReal      n_0;      /* reference number density */
132e0eea495SMark   PetscReal      t_0;      /* reference time */
133e0eea495SMark   PetscReal      Ez;
134e0eea495SMark   PetscReal      epsilon0;
135e0eea495SMark   PetscReal      k;
136e0eea495SMark   PetscReal      lnLam;
1378a6f2e61SMark Adams   PetscReal      electronShift;
138e0eea495SMark   PetscInt       num_species;
1398a6f2e61SMark Adams   PetscInt       num_grids;
1408fdabdddSMark Adams   PetscInt       species_offset[LANDAU_MAX_GRIDS+1]; // for each grid, but same for all batched vertices
1418fdabdddSMark Adams   PetscInt       mat_offset[LANDAU_MAX_GRIDS+1]; // for each grid, but same for all batched vertices
142cb25d741SMark Adams   // batching
143cb25d741SMark Adams   PetscBool      jacobian_field_major_order; // this could be a type but lets not get pedantic
144cb25d741SMark Adams   VecScatter     plex_batch;
145cb25d741SMark Adams   Vec            work_vec;
146cb25d741SMark Adams   IS             batch_is;
147cb25d741SMark Adams   PetscErrorCode (*seqaij_mult)(Mat,Vec,Vec);
148cb25d741SMark Adams   PetscErrorCode (*seqaij_multtranspose)(Mat,Vec,Vec);
149cb25d741SMark Adams   PetscErrorCode (*seqaij_solve)(Mat,Vec,Vec);
150cb25d741SMark Adams   PetscErrorCode (*seqaij_getdiagonal)(Mat,Vec);
151bfc784b7SMark Adams   /* COO */
152aab769cbSMark Adams   PetscBool        coo_assembly;
153e0eea495SMark   /* cache */
154e0eea495SMark   Mat              J;
155e0eea495SMark   Mat              M;
156e0eea495SMark   Vec              X;
157e0eea495SMark   /* derived type */
158e0eea495SMark   void             *data;
159e0eea495SMark   /* computing */
160e0eea495SMark   LandauDeviceType deviceType;
1618a6f2e61SMark Adams   DM               pack;
1628a6f2e61SMark Adams   DM               plex[LANDAU_MAX_GRIDS];
1638fdabdddSMark Adams   LandauStaticData SData_d; /* static geometric data on device */
16454545eeeSMark Adams   /* diagnostics */
16554545eeeSMark Adams   PetscInt         verbose;
16654545eeeSMark Adams   PetscLogEvent    events[20];
167c751c0a2SMark Adams   PetscLogStage    stage;
1688fdabdddSMark Adams   PetscBool        aux_bool;  /* helper */
1698fdabdddSMark Adams   PetscInt         batch_sz;
1708fdabdddSMark Adams   PetscInt         batch_view_idx;
171e0eea495SMark } LandauCtx;
172e0eea495SMark 
1738fdabdddSMark Adams #define LANDAU_SPECIES_MAJOR
1748fdabdddSMark Adams #if !defined(LANDAU_SPECIES_MAJOR)
1758fdabdddSMark Adams #define LAND_PACK_IDX(_b,_g) (_b*ctx->num_grids + _g)
1768fdabdddSMark Adams #define LAND_MOFFSET(_b,_g,_nbch,_ngrid,_mat_off) (_b*_mat_off[_ngrid] + _mat_off[_g])
1778fdabdddSMark Adams #else
1788fdabdddSMark Adams #define LAND_PACK_IDX(_b,_g) (_g*ctx->batch_sz + _b)
1798fdabdddSMark Adams #define LAND_MOFFSET(_b,_g,_nbch,_ngrid,_mat_off) (_nbch*_mat_off[_g] + _b*(_mat_off[_g+1] - _mat_off[_g]))
1808fdabdddSMark Adams #endif
1818fdabdddSMark Adams 
182a587d139SMark typedef struct {
183a587d139SMark   PetscReal scale;
184bfc784b7SMark Adams   LandauIdx gid;   // Landau matrix index (<10,000)
185a587d139SMark } pointInterpolationP4est;
186a587d139SMark typedef struct _lP4estVertexMaps {
187a587d139SMark   LandauIdx                (*gIdx)[LANDAU_MAX_SPECIES][LANDAU_MAX_NQ]; // #elems *  LANDAU_MAX_NQ (spoof for max , Nb) on device,
188a587d139SMark   LandauIdx                num_elements;
189a587d139SMark   LandauIdx                num_reduced;
190a587d139SMark   LandauIdx                num_face;  // (Q or Q^2 for 3D)
191a587d139SMark   LandauDeviceType         deviceType;
192a587d139SMark   PetscInt                 Nf;
193a587d139SMark   pointInterpolationP4est (*c_maps)[LANDAU_MAX_Q_FACE];
1948a6f2e61SMark Adams   struct _lP4estVertexMaps*d_self;
195a587d139SMark   void                    *vp1,*vp2,*vp3;
1968a6f2e61SMark Adams   PetscInt                numgrids;
197a587d139SMark } P4estVertexMaps;
198a587d139SMark 
199e0eea495SMark PETSC_EXTERN PetscErrorCode LandauCreateColoring(Mat, DM, PetscContainer *);
200e0eea495SMark #if defined(PETSC_HAVE_CUDA)
201a82411e9SMark Adams PETSC_EXTERN PetscErrorCode LandauCUDAJacobian(DM[], const PetscInt, const PetscInt, const PetscInt, const PetscInt[], PetscReal[], PetscScalar[], const PetscScalar[],
202d2319daeSMark Adams                                                const LandauStaticData *, const PetscReal, const PetscLogEvent[],const PetscInt[], const PetscInt[], Mat[], Mat);
2038a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauCUDACreateMatMaps(P4estVertexMaps *, pointInterpolationP4est (*)[LANDAU_MAX_Q_FACE], PetscInt[], PetscInt, PetscInt);
2048a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauCUDADestroyMatMaps(P4estVertexMaps *, PetscInt);
2058fdabdddSMark Adams PETSC_EXTERN PetscErrorCode LandauCUDAStaticDataSet(DM, const PetscInt, const PetscInt, const PetscInt, PetscInt[], PetscInt[], PetscInt[], PetscReal [], PetscReal [], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[],
2068a6f2e61SMark Adams                                                     PetscReal[], LandauStaticData *);
2078a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauCUDAStaticDataClear(LandauStaticData *);
208e0eea495SMark #endif
209e0eea495SMark #if defined(PETSC_HAVE_KOKKOS)
210a82411e9SMark Adams PETSC_EXTERN PetscErrorCode LandauKokkosJacobian(DM[], const PetscInt, const PetscInt, const PetscInt, const PetscInt[], PetscReal[], PetscScalar[],  const PetscScalar[],
211d2319daeSMark Adams                                                  const LandauStaticData *, const PetscReal, const PetscLogEvent[], const PetscInt[], const PetscInt[], Mat[], Mat);
2128a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauKokkosCreateMatMaps(P4estVertexMaps *, pointInterpolationP4est (*)[LANDAU_MAX_Q_FACE], PetscInt[], PetscInt, PetscInt);
2138a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauKokkosDestroyMatMaps(P4estVertexMaps *, PetscInt);
2148fdabdddSMark Adams PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataSet(DM, const PetscInt, const PetscInt, const PetscInt, PetscInt[], PetscInt[], PetscInt[], PetscReal [], PetscReal [], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[],
2158a6f2e61SMark Adams                                                       PetscReal[], LandauStaticData *);
2168a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataClear(LandauStaticData*);
217e0eea495SMark #endif
218e0eea495SMark 
219e0eea495SMark #endif /* PETSCLANDAU_H */
220