1*9ba83ac0SJeremy L Thompson // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other 249ed4312SSebastian Grimberg // CEED contributors. All Rights Reserved. See the top-level LICENSE and NOTICE 349ed4312SSebastian Grimberg // files for details. 449ed4312SSebastian Grimberg // 549ed4312SSebastian Grimberg // SPDX-License-Identifier: BSD-2-Clause 649ed4312SSebastian Grimberg // 749ed4312SSebastian Grimberg // This file is part of CEED: http://github.com/ceed 849ed4312SSebastian Grimberg 949ed4312SSebastian Grimberg #include <ceed/ceed.h> 1049ed4312SSebastian Grimberg #include <sycl/sycl.hpp> 1149ed4312SSebastian Grimberg 1249ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 1349ed4312SSebastian Grimberg // Kernel for set value on device 1449ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 1549ed4312SSebastian Grimberg __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedInt size, CeedScalar val) { 16dd64fc84SJeremy L Thompson int index = threadIdx.x + blockDim.x * blockIdx.x; 17dd64fc84SJeremy L Thompson 18dd64fc84SJeremy L Thompson if (index >= size) return; 19dd64fc84SJeremy L Thompson vec[index] = val; 2049ed4312SSebastian Grimberg } 2149ed4312SSebastian Grimberg 2249ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 2349ed4312SSebastian Grimberg // Set value on device memory 2449ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 2549ed4312SSebastian Grimberg extern "C" int CeedDeviceSetValue_Sycl(CeedScalar *d_array, CeedInt length, CeedScalar val) { 26dd64fc84SJeremy L Thompson const int block_size = 512; 27dd64fc84SJeremy L Thompson const int vec_size = length; 28dd64fc84SJeremy L Thompson int grid_size = vec_size / block_size; 2949ed4312SSebastian Grimberg 30dd64fc84SJeremy L Thompson if (block_size * grid_size < vec_size) grid_size += 1; 31dd64fc84SJeremy L Thompson setValueK<<<grid_size, block_size>>>(d_array, length, val); 3249ed4312SSebastian Grimberg return 0; 3349ed4312SSebastian Grimberg } 3449ed4312SSebastian Grimberg 3549ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 3649ed4312SSebastian Grimberg // Kernel for taking reciprocal 3749ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 3849ed4312SSebastian Grimberg __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedInt size) { 39dd64fc84SJeremy L Thompson int index = threadIdx.x + blockDim.x * blockIdx.x; 40dd64fc84SJeremy L Thompson 41dd64fc84SJeremy L Thompson if (index >= size) return; 42dd64fc84SJeremy L Thompson if (fabs(vec[index]) > 1E-16) vec[index] = 1. / vec[index]; 4349ed4312SSebastian Grimberg } 4449ed4312SSebastian Grimberg 4549ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 4649ed4312SSebastian Grimberg // Take vector reciprocal in device memory 4749ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 4849ed4312SSebastian Grimberg extern "C" int CeedDeviceReciprocal_Sycl(CeedScalar *d_array, CeedInt length) { 49dd64fc84SJeremy L Thompson const int block_size = 512; 50dd64fc84SJeremy L Thompson const int vec_size = length; 51dd64fc84SJeremy L Thompson int grid_size = vec_size / block_size; 5249ed4312SSebastian Grimberg 53dd64fc84SJeremy L Thompson if (block_size * grid_size < vec_size) grid_size += 1; 54dd64fc84SJeremy L Thompson rcpValueK<<<grid_size, block_size>>>(d_array, length); 5549ed4312SSebastian Grimberg return 0; 5649ed4312SSebastian Grimberg } 5749ed4312SSebastian Grimberg 5849ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 5949ed4312SSebastian Grimberg // Kernel for scale 6049ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 6149ed4312SSebastian Grimberg __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedInt size) { 62dd64fc84SJeremy L Thompson int index = threadIdx.x + blockDim.x * blockIdx.x; 63dd64fc84SJeremy L Thompson 64dd64fc84SJeremy L Thompson if (index >= size) return; 65dd64fc84SJeremy L Thompson x[index] *= alpha; 6649ed4312SSebastian Grimberg } 6749ed4312SSebastian Grimberg 6849ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 6949ed4312SSebastian Grimberg // Compute x = alpha x on device 7049ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 7149ed4312SSebastian Grimberg extern "C" int CeedDeviceScale_Sycl(CeedScalar *x_array, CeedScalar alpha, CeedInt length) { 72dd64fc84SJeremy L Thompson const int block_size = 512; 73dd64fc84SJeremy L Thompson const int vec_size = length; 74dd64fc84SJeremy L Thompson int grid_size = vec_size / block_size; 7549ed4312SSebastian Grimberg 76dd64fc84SJeremy L Thompson if (block_size * grid_size < vec_size) grid_size += 1; 77dd64fc84SJeremy L Thompson scaleValueK<<<grid_size, block_size>>>(x_array, alpha, length); 7849ed4312SSebastian Grimberg return 0; 7949ed4312SSebastian Grimberg } 8049ed4312SSebastian Grimberg 8149ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 8249ed4312SSebastian Grimberg // Kernel for axpy 8349ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 8449ed4312SSebastian Grimberg __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedInt size) { 85dd64fc84SJeremy L Thompson int index = threadIdx.x + blockDim.x * blockIdx.x; 86dd64fc84SJeremy L Thompson if (index >= size) return; 87dd64fc84SJeremy L Thompson y[index] += alpha * x[index]; 8849ed4312SSebastian Grimberg } 8949ed4312SSebastian Grimberg 9049ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 9149ed4312SSebastian Grimberg // Compute y = alpha x + y on device 9249ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 9349ed4312SSebastian Grimberg extern "C" int CeedDeviceAXPY_Sycl(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedInt length) { 94dd64fc84SJeremy L Thompson const int block_size = 512; 95dd64fc84SJeremy L Thompson const int vec_size = length; 96dd64fc84SJeremy L Thompson int grid_size = vec_size / block_size; 9749ed4312SSebastian Grimberg 98dd64fc84SJeremy L Thompson if (block_size * grid_size < vec_size) grid_size += 1; 99dd64fc84SJeremy L Thompson axpyValueK<<<grid_size, block_size>>>(y_array, alpha, x_array, length); 10049ed4312SSebastian Grimberg return 0; 10149ed4312SSebastian Grimberg } 10249ed4312SSebastian Grimberg 10349ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 10449ed4312SSebastian Grimberg // Kernel for pointwise mult 10549ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 10649ed4312SSebastian Grimberg __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedInt size) { 107dd64fc84SJeremy L Thompson int index = threadIdx.x + blockDim.x * blockIdx.x; 108dd64fc84SJeremy L Thompson 109dd64fc84SJeremy L Thompson if (index >= size) return; 110dd64fc84SJeremy L Thompson w[index] = x[index] * y[index]; 11149ed4312SSebastian Grimberg } 11249ed4312SSebastian Grimberg 11349ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 11449ed4312SSebastian Grimberg // Compute the pointwise multiplication w = x .* y on device 11549ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 11649ed4312SSebastian Grimberg extern "C" int CeedDevicePointwiseMult_Sycl(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedInt length) { 117dd64fc84SJeremy L Thompson const int block_size = 512; 118dd64fc84SJeremy L Thompson const int vec_size = length; 119dd64fc84SJeremy L Thompson int grid_size = vec_size / block_size; 12049ed4312SSebastian Grimberg 121dd64fc84SJeremy L Thompson if (block_size * grid_size < vec_size) grid_size += 1; 122dd64fc84SJeremy L Thompson pointwiseMultValueK<<<grid_size, block_size>>>(w_array, x_array, y_array, length); 12349ed4312SSebastian Grimberg return 0; 12449ed4312SSebastian Grimberg } 12549ed4312SSebastian Grimberg 12649ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 127