1e0eea495SMark #if !defined(PETSCLANDAU_H) 2e0eea495SMark #define PETSCLANDAU_H 3e0eea495SMark 4e0eea495SMark #include <petscdmplex.h> /*I "petscdmplex.h" I*/ 5e0eea495SMark #include <petscts.h> 6e0eea495SMark 7e0eea495SMark PETSC_EXTERN PetscErrorCode LandauPrintNorms(Vec, PetscInt); 8e0eea495SMark PETSC_EXTERN PetscErrorCode LandauCreateVelocitySpace(MPI_Comm,PetscInt,const char[],Vec*,Mat*,DM*); 9e0eea495SMark PETSC_EXTERN PetscErrorCode LandauDestroyVelocitySpace(DM*); 10*8a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauAddMaxwellians(DM, Vec, PetscReal, PetscReal[], PetscReal[], PetscInt, void *); 11e0eea495SMark PETSC_EXTERN PetscErrorCode LandauCreateMassMatrix(DM dm, Mat *Amat); 12e0eea495SMark PETSC_EXTERN PetscErrorCode LandauIFunction(TS, PetscReal,Vec,Vec,Vec,void *); 13e0eea495SMark PETSC_EXTERN PetscErrorCode LandauIJacobian(TS, PetscReal,Vec,Vec,PetscReal,Mat,Mat,void *); 14e0eea495SMark 15e0eea495SMark /* the Fokker-Planck-Landau context */ 16e0eea495SMark #if !defined(LANDAU_DIM) 17e0eea495SMark #define LANDAU_DIM 2 18e0eea495SMark #endif 19a587d139SMark 20e0eea495SMark #if !defined(LANDAU_MAX_SPECIES) 2152cdd6eaSMark #if LANDAU_DIM==2 22e0eea495SMark #define LANDAU_MAX_SPECIES 10 23*8a6f2e61SMark Adams #define LANDAU_MAX_GRIDS 3 2452cdd6eaSMark #else 2552cdd6eaSMark #define LANDAU_MAX_SPECIES 3 26*8a6f2e61SMark Adams #define LANDAU_MAX_GRIDS 2 2752cdd6eaSMark #endif 28*8a6f2e61SMark Adams #else 29*8a6f2e61SMark Adams #define LANDAU_MAX_GRIDS 3 30e0eea495SMark #endif 31a587d139SMark 32a587d139SMark #if !defined(LANDAU_MAX_Q) 33a587d139SMark #if defined(LANDAU_MAX_NQ) 34a587d139SMark #error"LANDAU_MAX_NQ but not LANDAU_MAX_Q. Use -DLANDAU_MAX_Q=4 for Q3 elements" 35a587d139SMark #endif 3652cdd6eaSMark #if LANDAU_DIM==2 37a587d139SMark #define LANDAU_MAX_Q 5 38e0eea495SMark #else 39a587d139SMark #define LANDAU_MAX_Q 3 40a587d139SMark #endif 41a587d139SMark #else 42a587d139SMark #undef LANDAU_MAX_NQ 43e0eea495SMark #endif 44a587d139SMark 45a587d139SMark #if LANDAU_DIM==2 46930e68a5SMark Adams #define LANDAU_MAX_Q_FACE LANDAU_MAX_Q 47a587d139SMark #define LANDAU_MAX_NQ (LANDAU_MAX_Q*LANDAU_MAX_Q) 48a587d139SMark #else 49930e68a5SMark Adams #define LANDAU_MAX_Q_FACE (LANDAU_MAX_Q*LANDAU_MAX_Q) 50a587d139SMark #define LANDAU_MAX_NQ (LANDAU_MAX_Q*LANDAU_MAX_Q*LANDAU_MAX_Q) 51a587d139SMark #endif 52a587d139SMark 53e0eea495SMark typedef enum {LANDAU_CUDA, LANDAU_KOKKOS, LANDAU_CPU} LandauDeviceType; 54e8d2b73aSMark Adams 55*8a6f2e61SMark Adams // static data 56e8d2b73aSMark Adams typedef struct { 57e8d2b73aSMark Adams void *invJ; // nip*dim*dim 58e8d2b73aSMark Adams void *D; // nq*nb*dim 59e8d2b73aSMark Adams void *B; // nq*nb 60e8d2b73aSMark Adams void *alpha; // ns 61e8d2b73aSMark Adams void *beta; // ns 62e8d2b73aSMark Adams void *invMass; // ns 63e8d2b73aSMark Adams void *w; // nip 64e8d2b73aSMark Adams void *x; // nip 65e8d2b73aSMark Adams void *y; // nip 66e8d2b73aSMark Adams void *z; // nip 67e8d2b73aSMark Adams void *Eq_m; // ns - dynamic 68e8d2b73aSMark Adams void *f; // nip*Nf - dynamic (IP) 69e8d2b73aSMark Adams void *dfdx; // nip*Nf - dynamic (IP) 70e8d2b73aSMark Adams void *dfdy; // nip*Nf - dynamic (IP) 71e8d2b73aSMark Adams void *dfdz; // nip*Nf - dynamic (IP) 72e8d2b73aSMark Adams int dim_,ns_,nip_,nq_,nb_; 73*8a6f2e61SMark Adams void *NCells; 74*8a6f2e61SMark Adams void *species_offset; 75*8a6f2e61SMark Adams void *mat_offset; 76*8a6f2e61SMark Adams void *elem_offset; 77*8a6f2e61SMark Adams void *ip_offset; 78*8a6f2e61SMark Adams void *ipf_offset; 79*8a6f2e61SMark Adams void *ipfdf_data; 80*8a6f2e61SMark Adams void *maps; 81*8a6f2e61SMark Adams } LandauStaticData; 82e8d2b73aSMark Adams 83e0eea495SMark typedef struct { 84e0eea495SMark PetscBool interpolate; /* Generate intermediate mesh elements */ 85a587d139SMark PetscBool gpu_assembly; 86*8a6f2e61SMark Adams MPI_Comm comm; /* global communicator to use for errors and diagnostics */ 87*8a6f2e61SMark Adams double times[1]; 88*8a6f2e61SMark Adams PetscBool initialized; 89*8a6f2e61SMark Adams PetscBool use_matrix_mass; 90*8a6f2e61SMark Adams /* FE */ 91e0eea495SMark PetscFE fe[LANDAU_MAX_SPECIES]; 92e0eea495SMark /* geometry */ 93*8a6f2e61SMark Adams PetscReal radius[LANDAU_MAX_GRIDS]; 94*8a6f2e61SMark Adams PetscReal re_radius; /* RE: radius of refinement along v_perp=0, z>0 */ 95*8a6f2e61SMark Adams PetscReal vperp0_radius1; /* RE: radius of refinement along v_perp=0 */ 96*8a6f2e61SMark Adams PetscReal vperp0_radius2; /* RE: radius of refinement along v_perp=0 after origin AMR refinement */ 97*8a6f2e61SMark Adams PetscBool sphere; // not used 98*8a6f2e61SMark Adams PetscBool inflate; // not used 99*8a6f2e61SMark Adams PetscReal i_radius[LANDAU_MAX_GRIDS]; // not used 100*8a6f2e61SMark Adams PetscReal e_radius; // not used 101*8a6f2e61SMark Adams PetscInt num_sections; // not used 102*8a6f2e61SMark Adams /* AMR */ 103*8a6f2e61SMark Adams PetscBool use_p4est; 104*8a6f2e61SMark Adams PetscInt numRERefine; /* RE: refinement along v_perp=0, z > 0 */ 105*8a6f2e61SMark Adams PetscInt nZRefine1; /* RE: origin refinement after v_perp=0 refinement */ 106*8a6f2e61SMark Adams PetscInt nZRefine2; /* RE: origin refinement after origin AMR refinement */ 107*8a6f2e61SMark Adams PetscInt numAMRRefine[LANDAU_MAX_GRIDS]; /* normal AMR - refine from origin */ 108*8a6f2e61SMark Adams PetscInt postAMRRefine[LANDAU_MAX_GRIDS]; /* uniform refinement of AMR */ 109*8a6f2e61SMark Adams /* relativistic */ 110cefb98e8SMark Adams PetscBool use_energy_tensor_trick; 111cefb98e8SMark Adams PetscBool use_relativistic_corrections; 112e0eea495SMark /* physics */ 113e0eea495SMark PetscReal thermal_temps[LANDAU_MAX_SPECIES]; 114e0eea495SMark PetscReal masses[LANDAU_MAX_SPECIES]; /* mass of each species */ 115e0eea495SMark PetscReal charges[LANDAU_MAX_SPECIES]; /* charge of each species */ 116e0eea495SMark PetscReal n[LANDAU_MAX_SPECIES]; /* number density of each species */ 117e0eea495SMark PetscReal m_0; /* reference mass */ 118e0eea495SMark PetscReal v_0; /* reference velocity */ 119e0eea495SMark PetscReal n_0; /* reference number density */ 120e0eea495SMark PetscReal t_0; /* reference time */ 121e0eea495SMark PetscReal Ez; 122e0eea495SMark PetscReal epsilon0; 123e0eea495SMark PetscReal k; 124e0eea495SMark PetscReal lnLam; 125*8a6f2e61SMark Adams PetscReal electronShift; 126e0eea495SMark PetscInt num_species; 127*8a6f2e61SMark Adams PetscInt num_grids; 128*8a6f2e61SMark Adams PetscInt species_offset[LANDAU_MAX_GRIDS+1]; 129*8a6f2e61SMark Adams PetscInt mat_offset[LANDAU_MAX_GRIDS+1]; 130e0eea495SMark /* cache */ 131e0eea495SMark Mat J; 132e0eea495SMark Mat M; 133e0eea495SMark Vec X; 134e0eea495SMark /* derived type */ 135e0eea495SMark void *data; 136e0eea495SMark PetscBool aux_bool; /* helper */ 137e0eea495SMark /* computing */ 138e0eea495SMark LandauDeviceType deviceType; 139bddcd29dSMark Adams PetscInt subThreadBlockSize; 140bddcd29dSMark Adams PetscInt numConcurrency; /* number of SMs in Cuda to use */ 141*8a6f2e61SMark Adams DM pack; 142*8a6f2e61SMark Adams DM plex[LANDAU_MAX_GRIDS]; 143*8a6f2e61SMark Adams LandauStaticData SData_d; /* static geometric data on device, this holds host pointers */ 14454545eeeSMark Adams /* diagnostics */ 14554545eeeSMark Adams PetscInt verbose; 14654545eeeSMark Adams PetscLogEvent events[20]; 147e0eea495SMark } LandauCtx; 148e0eea495SMark 149a587d139SMark typedef int LandauIdx; 150a587d139SMark typedef struct { 151a587d139SMark PetscReal scale; 152a587d139SMark LandauIdx gid; // Lanadu matrix index (<10,000) 153a587d139SMark } pointInterpolationP4est; 154a587d139SMark typedef struct _lP4estVertexMaps { 155a587d139SMark LandauIdx (*gIdx)[LANDAU_MAX_SPECIES][LANDAU_MAX_NQ]; // #elems * LANDAU_MAX_NQ (spoof for max , Nb) on device, 156a587d139SMark LandauIdx num_elements; 157a587d139SMark LandauIdx num_reduced; 158a587d139SMark LandauIdx num_face; // (Q or Q^2 for 3D) 159a587d139SMark LandauDeviceType deviceType; 160a587d139SMark PetscInt Nf; 161a587d139SMark PetscInt Nq; 162a587d139SMark pointInterpolationP4est (*c_maps)[LANDAU_MAX_Q_FACE]; 163*8a6f2e61SMark Adams struct _lP4estVertexMaps*d_self; 164a587d139SMark void *vp1,*vp2,*vp3; 165*8a6f2e61SMark Adams PetscInt numgrids; 166a587d139SMark } P4estVertexMaps; 167a587d139SMark 168e0eea495SMark PETSC_EXTERN PetscErrorCode LandauCreateColoring(Mat, DM, PetscContainer *); 169e0eea495SMark #if defined(PETSC_HAVE_CUDA) 170*8a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauCUDAJacobian(DM[], const PetscInt, const PetscInt, const PetscInt[], PetscReal[], PetscScalar[], const PetscInt, const PetscScalar[], 171*8a6f2e61SMark Adams const LandauStaticData *, const PetscInt, const PetscReal, const PetscLogEvent[],const PetscInt[], const PetscInt[], Mat[], Mat); 172*8a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauCUDACreateMatMaps(P4estVertexMaps *, pointInterpolationP4est (*)[LANDAU_MAX_Q_FACE], PetscInt[], PetscInt, PetscInt); 173*8a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauCUDADestroyMatMaps(P4estVertexMaps *, PetscInt); 174*8a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauCUDAStaticDataSet(DM, const PetscInt, const PetscInt, PetscInt[], PetscInt[], PetscInt[], PetscReal [], PetscReal [], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], 175*8a6f2e61SMark Adams PetscReal[], LandauStaticData *); 176*8a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauCUDAStaticDataClear(LandauStaticData *); 177e0eea495SMark #endif 178e0eea495SMark #if defined(PETSC_HAVE_KOKKOS) 179*8a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauKokkosJacobian(DM[], const PetscInt, const PetscInt, const PetscInt[], PetscReal[], PetscScalar[], const PetscInt, const PetscScalar[], 180*8a6f2e61SMark Adams const LandauStaticData *, const PetscInt, const PetscReal, const PetscLogEvent[], const PetscInt[], const PetscInt[], Mat[], Mat); 181*8a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauKokkosCreateMatMaps(P4estVertexMaps *, pointInterpolationP4est (*)[LANDAU_MAX_Q_FACE], PetscInt[], PetscInt, PetscInt); 182*8a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauKokkosDestroyMatMaps(P4estVertexMaps *, PetscInt); 183*8a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataSet(DM, const PetscInt, const PetscInt, PetscInt[], PetscInt[], PetscInt[], PetscReal [], PetscReal [], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], 184*8a6f2e61SMark Adams PetscReal[], LandauStaticData *); 185*8a6f2e61SMark Adams PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataClear(LandauStaticData*); 186e0eea495SMark #endif 187e0eea495SMark 188e0eea495SMark #endif /* PETSCLANDAU_H */ 189