149ed4312SSebastian Grimberg // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other 249ed4312SSebastian Grimberg // CEED contributors. All Rights Reserved. See the top-level LICENSE and NOTICE 349ed4312SSebastian Grimberg // files for details. 449ed4312SSebastian Grimberg // 549ed4312SSebastian Grimberg // SPDX-License-Identifier: BSD-2-Clause 649ed4312SSebastian Grimberg // 749ed4312SSebastian Grimberg // This file is part of CEED: http://github.com/ceed 849ed4312SSebastian Grimberg 949ed4312SSebastian Grimberg #include <ceed/ceed.h> 1049ed4312SSebastian Grimberg #include <sycl/sycl.hpp> 1149ed4312SSebastian Grimberg 1249ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 1349ed4312SSebastian Grimberg // Kernel for set value on device 1449ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 1549ed4312SSebastian Grimberg __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedInt size, CeedScalar val) { 16*dd64fc84SJeremy L Thompson int index = threadIdx.x + blockDim.x * blockIdx.x; 17*dd64fc84SJeremy L Thompson 18*dd64fc84SJeremy L Thompson if (index >= size) return; 19*dd64fc84SJeremy L Thompson vec[index] = val; 2049ed4312SSebastian Grimberg } 2149ed4312SSebastian Grimberg 2249ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 2349ed4312SSebastian Grimberg // Set value on device memory 2449ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 2549ed4312SSebastian Grimberg extern "C" int CeedDeviceSetValue_Sycl(CeedScalar *d_array, CeedInt length, CeedScalar val) { 26*dd64fc84SJeremy L Thompson const int block_size = 512; 27*dd64fc84SJeremy L Thompson const int vec_size = length; 28*dd64fc84SJeremy L Thompson int grid_size = vec_size / block_size; 2949ed4312SSebastian Grimberg 30*dd64fc84SJeremy L Thompson if (block_size * grid_size < vec_size) grid_size += 1; 31*dd64fc84SJeremy L Thompson setValueK<<<grid_size, block_size>>>(d_array, length, val); 3249ed4312SSebastian Grimberg return 0; 3349ed4312SSebastian Grimberg } 3449ed4312SSebastian Grimberg 3549ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 3649ed4312SSebastian Grimberg // Kernel for taking reciprocal 3749ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 3849ed4312SSebastian Grimberg __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedInt size) { 39*dd64fc84SJeremy L Thompson int index = threadIdx.x + blockDim.x * blockIdx.x; 40*dd64fc84SJeremy L Thompson 41*dd64fc84SJeremy L Thompson if (index >= size) return; 42*dd64fc84SJeremy L Thompson if (fabs(vec[index]) > 1E-16) vec[index] = 1. / vec[index]; 4349ed4312SSebastian Grimberg } 4449ed4312SSebastian Grimberg 4549ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 4649ed4312SSebastian Grimberg // Take vector reciprocal in device memory 4749ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 4849ed4312SSebastian Grimberg extern "C" int CeedDeviceReciprocal_Sycl(CeedScalar *d_array, CeedInt length) { 49*dd64fc84SJeremy L Thompson const int block_size = 512; 50*dd64fc84SJeremy L Thompson const int vec_size = length; 51*dd64fc84SJeremy L Thompson int grid_size = vec_size / block_size; 5249ed4312SSebastian Grimberg 53*dd64fc84SJeremy L Thompson if (block_size * grid_size < vec_size) grid_size += 1; 54*dd64fc84SJeremy L Thompson rcpValueK<<<grid_size, block_size>>>(d_array, length); 5549ed4312SSebastian Grimberg return 0; 5649ed4312SSebastian Grimberg } 5749ed4312SSebastian Grimberg 5849ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 5949ed4312SSebastian Grimberg // Kernel for scale 6049ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 6149ed4312SSebastian Grimberg __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedInt size) { 62*dd64fc84SJeremy L Thompson int index = threadIdx.x + blockDim.x * blockIdx.x; 63*dd64fc84SJeremy L Thompson 64*dd64fc84SJeremy L Thompson if (index >= size) return; 65*dd64fc84SJeremy L Thompson x[index] *= alpha; 6649ed4312SSebastian Grimberg } 6749ed4312SSebastian Grimberg 6849ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 6949ed4312SSebastian Grimberg // Compute x = alpha x on device 7049ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 7149ed4312SSebastian Grimberg extern "C" int CeedDeviceScale_Sycl(CeedScalar *x_array, CeedScalar alpha, CeedInt length) { 72*dd64fc84SJeremy L Thompson const int block_size = 512; 73*dd64fc84SJeremy L Thompson const int vec_size = length; 74*dd64fc84SJeremy L Thompson int grid_size = vec_size / block_size; 7549ed4312SSebastian Grimberg 76*dd64fc84SJeremy L Thompson if (block_size * grid_size < vec_size) grid_size += 1; 77*dd64fc84SJeremy L Thompson scaleValueK<<<grid_size, block_size>>>(x_array, alpha, length); 7849ed4312SSebastian Grimberg return 0; 7949ed4312SSebastian Grimberg } 8049ed4312SSebastian Grimberg 8149ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 8249ed4312SSebastian Grimberg // Kernel for axpy 8349ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 8449ed4312SSebastian Grimberg __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedInt size) { 85*dd64fc84SJeremy L Thompson int index = threadIdx.x + blockDim.x * blockIdx.x; 86*dd64fc84SJeremy L Thompson if (index >= size) return; 87*dd64fc84SJeremy L Thompson y[index] += alpha * x[index]; 8849ed4312SSebastian Grimberg } 8949ed4312SSebastian Grimberg 9049ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 9149ed4312SSebastian Grimberg // Compute y = alpha x + y on device 9249ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 9349ed4312SSebastian Grimberg extern "C" int CeedDeviceAXPY_Sycl(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedInt length) { 94*dd64fc84SJeremy L Thompson const int block_size = 512; 95*dd64fc84SJeremy L Thompson const int vec_size = length; 96*dd64fc84SJeremy L Thompson int grid_size = vec_size / block_size; 9749ed4312SSebastian Grimberg 98*dd64fc84SJeremy L Thompson if (block_size * grid_size < vec_size) grid_size += 1; 99*dd64fc84SJeremy L Thompson axpyValueK<<<grid_size, block_size>>>(y_array, alpha, x_array, length); 10049ed4312SSebastian Grimberg return 0; 10149ed4312SSebastian Grimberg } 10249ed4312SSebastian Grimberg 10349ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 10449ed4312SSebastian Grimberg // Kernel for pointwise mult 10549ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 10649ed4312SSebastian Grimberg __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedInt size) { 107*dd64fc84SJeremy L Thompson int index = threadIdx.x + blockDim.x * blockIdx.x; 108*dd64fc84SJeremy L Thompson 109*dd64fc84SJeremy L Thompson if (index >= size) return; 110*dd64fc84SJeremy L Thompson w[index] = x[index] * y[index]; 11149ed4312SSebastian Grimberg } 11249ed4312SSebastian Grimberg 11349ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 11449ed4312SSebastian Grimberg // Compute the pointwise multiplication w = x .* y on device 11549ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 11649ed4312SSebastian Grimberg extern "C" int CeedDevicePointwiseMult_Sycl(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedInt length) { 117*dd64fc84SJeremy L Thompson const int block_size = 512; 118*dd64fc84SJeremy L Thompson const int vec_size = length; 119*dd64fc84SJeremy L Thompson int grid_size = vec_size / block_size; 12049ed4312SSebastian Grimberg 121*dd64fc84SJeremy L Thompson if (block_size * grid_size < vec_size) grid_size += 1; 122*dd64fc84SJeremy L Thompson pointwiseMultValueK<<<grid_size, block_size>>>(w_array, x_array, y_array, length); 12349ed4312SSebastian Grimberg return 0; 12449ed4312SSebastian Grimberg } 12549ed4312SSebastian Grimberg 12649ed4312SSebastian Grimberg //------------------------------------------------------------------------------ 127