xref: /libCEED/backends/sycl-ref/kernels/sycl-ref-vector.cpp (revision dd64fc8452c2d35c954858232143719e6bb2e61d)
149ed4312SSebastian Grimberg // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other
249ed4312SSebastian Grimberg // CEED contributors. All Rights Reserved. See the top-level LICENSE and NOTICE
349ed4312SSebastian Grimberg // files for details.
449ed4312SSebastian Grimberg //
549ed4312SSebastian Grimberg // SPDX-License-Identifier: BSD-2-Clause
649ed4312SSebastian Grimberg //
749ed4312SSebastian Grimberg // This file is part of CEED:  http://github.com/ceed
849ed4312SSebastian Grimberg 
949ed4312SSebastian Grimberg #include <ceed/ceed.h>
1049ed4312SSebastian Grimberg #include <sycl/sycl.hpp>
1149ed4312SSebastian Grimberg 
1249ed4312SSebastian Grimberg //------------------------------------------------------------------------------
1349ed4312SSebastian Grimberg // Kernel for set value on device
1449ed4312SSebastian Grimberg //------------------------------------------------------------------------------
1549ed4312SSebastian Grimberg __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedInt size, CeedScalar val) {
16*dd64fc84SJeremy L Thompson   int index = threadIdx.x + blockDim.x * blockIdx.x;
17*dd64fc84SJeremy L Thompson 
18*dd64fc84SJeremy L Thompson   if (index >= size) return;
19*dd64fc84SJeremy L Thompson   vec[index] = val;
2049ed4312SSebastian Grimberg }
2149ed4312SSebastian Grimberg 
2249ed4312SSebastian Grimberg //------------------------------------------------------------------------------
2349ed4312SSebastian Grimberg // Set value on device memory
2449ed4312SSebastian Grimberg //------------------------------------------------------------------------------
2549ed4312SSebastian Grimberg extern "C" int CeedDeviceSetValue_Sycl(CeedScalar *d_array, CeedInt length, CeedScalar val) {
26*dd64fc84SJeremy L Thompson   const int block_size = 512;
27*dd64fc84SJeremy L Thompson   const int vec_size   = length;
28*dd64fc84SJeremy L Thompson   int       grid_size  = vec_size / block_size;
2949ed4312SSebastian Grimberg 
30*dd64fc84SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
31*dd64fc84SJeremy L Thompson   setValueK<<<grid_size, block_size>>>(d_array, length, val);
3249ed4312SSebastian Grimberg   return 0;
3349ed4312SSebastian Grimberg }
3449ed4312SSebastian Grimberg 
3549ed4312SSebastian Grimberg //------------------------------------------------------------------------------
3649ed4312SSebastian Grimberg // Kernel for taking reciprocal
3749ed4312SSebastian Grimberg //------------------------------------------------------------------------------
3849ed4312SSebastian Grimberg __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedInt size) {
39*dd64fc84SJeremy L Thompson   int index = threadIdx.x + blockDim.x * blockIdx.x;
40*dd64fc84SJeremy L Thompson 
41*dd64fc84SJeremy L Thompson   if (index >= size) return;
42*dd64fc84SJeremy L Thompson   if (fabs(vec[index]) > 1E-16) vec[index] = 1. / vec[index];
4349ed4312SSebastian Grimberg }
4449ed4312SSebastian Grimberg 
4549ed4312SSebastian Grimberg //------------------------------------------------------------------------------
4649ed4312SSebastian Grimberg // Take vector reciprocal in device memory
4749ed4312SSebastian Grimberg //------------------------------------------------------------------------------
4849ed4312SSebastian Grimberg extern "C" int CeedDeviceReciprocal_Sycl(CeedScalar *d_array, CeedInt length) {
49*dd64fc84SJeremy L Thompson   const int block_size = 512;
50*dd64fc84SJeremy L Thompson   const int vec_size   = length;
51*dd64fc84SJeremy L Thompson   int       grid_size  = vec_size / block_size;
5249ed4312SSebastian Grimberg 
53*dd64fc84SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
54*dd64fc84SJeremy L Thompson   rcpValueK<<<grid_size, block_size>>>(d_array, length);
5549ed4312SSebastian Grimberg   return 0;
5649ed4312SSebastian Grimberg }
5749ed4312SSebastian Grimberg 
5849ed4312SSebastian Grimberg //------------------------------------------------------------------------------
5949ed4312SSebastian Grimberg // Kernel for scale
6049ed4312SSebastian Grimberg //------------------------------------------------------------------------------
6149ed4312SSebastian Grimberg __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedInt size) {
62*dd64fc84SJeremy L Thompson   int index = threadIdx.x + blockDim.x * blockIdx.x;
63*dd64fc84SJeremy L Thompson 
64*dd64fc84SJeremy L Thompson   if (index >= size) return;
65*dd64fc84SJeremy L Thompson   x[index] *= alpha;
6649ed4312SSebastian Grimberg }
6749ed4312SSebastian Grimberg 
6849ed4312SSebastian Grimberg //------------------------------------------------------------------------------
6949ed4312SSebastian Grimberg // Compute x = alpha x on device
7049ed4312SSebastian Grimberg //------------------------------------------------------------------------------
7149ed4312SSebastian Grimberg extern "C" int CeedDeviceScale_Sycl(CeedScalar *x_array, CeedScalar alpha, CeedInt length) {
72*dd64fc84SJeremy L Thompson   const int block_size = 512;
73*dd64fc84SJeremy L Thompson   const int vec_size   = length;
74*dd64fc84SJeremy L Thompson   int       grid_size  = vec_size / block_size;
7549ed4312SSebastian Grimberg 
76*dd64fc84SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
77*dd64fc84SJeremy L Thompson   scaleValueK<<<grid_size, block_size>>>(x_array, alpha, length);
7849ed4312SSebastian Grimberg   return 0;
7949ed4312SSebastian Grimberg }
8049ed4312SSebastian Grimberg 
8149ed4312SSebastian Grimberg //------------------------------------------------------------------------------
8249ed4312SSebastian Grimberg // Kernel for axpy
8349ed4312SSebastian Grimberg //------------------------------------------------------------------------------
8449ed4312SSebastian Grimberg __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedInt size) {
85*dd64fc84SJeremy L Thompson   int index = threadIdx.x + blockDim.x * blockIdx.x;
86*dd64fc84SJeremy L Thompson   if (index >= size) return;
87*dd64fc84SJeremy L Thompson   y[index] += alpha * x[index];
8849ed4312SSebastian Grimberg }
8949ed4312SSebastian Grimberg 
9049ed4312SSebastian Grimberg //------------------------------------------------------------------------------
9149ed4312SSebastian Grimberg // Compute y = alpha x + y on device
9249ed4312SSebastian Grimberg //------------------------------------------------------------------------------
9349ed4312SSebastian Grimberg extern "C" int CeedDeviceAXPY_Sycl(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedInt length) {
94*dd64fc84SJeremy L Thompson   const int block_size = 512;
95*dd64fc84SJeremy L Thompson   const int vec_size   = length;
96*dd64fc84SJeremy L Thompson   int       grid_size  = vec_size / block_size;
9749ed4312SSebastian Grimberg 
98*dd64fc84SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
99*dd64fc84SJeremy L Thompson   axpyValueK<<<grid_size, block_size>>>(y_array, alpha, x_array, length);
10049ed4312SSebastian Grimberg   return 0;
10149ed4312SSebastian Grimberg }
10249ed4312SSebastian Grimberg 
10349ed4312SSebastian Grimberg //------------------------------------------------------------------------------
10449ed4312SSebastian Grimberg // Kernel for pointwise mult
10549ed4312SSebastian Grimberg //------------------------------------------------------------------------------
10649ed4312SSebastian Grimberg __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedInt size) {
107*dd64fc84SJeremy L Thompson   int index = threadIdx.x + blockDim.x * blockIdx.x;
108*dd64fc84SJeremy L Thompson 
109*dd64fc84SJeremy L Thompson   if (index >= size) return;
110*dd64fc84SJeremy L Thompson   w[index] = x[index] * y[index];
11149ed4312SSebastian Grimberg }
11249ed4312SSebastian Grimberg 
11349ed4312SSebastian Grimberg //------------------------------------------------------------------------------
11449ed4312SSebastian Grimberg // Compute the pointwise multiplication w = x .* y on device
11549ed4312SSebastian Grimberg //------------------------------------------------------------------------------
11649ed4312SSebastian Grimberg extern "C" int CeedDevicePointwiseMult_Sycl(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedInt length) {
117*dd64fc84SJeremy L Thompson   const int block_size = 512;
118*dd64fc84SJeremy L Thompson   const int vec_size   = length;
119*dd64fc84SJeremy L Thompson   int       grid_size  = vec_size / block_size;
12049ed4312SSebastian Grimberg 
121*dd64fc84SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
122*dd64fc84SJeremy L Thompson   pointwiseMultValueK<<<grid_size, block_size>>>(w_array, x_array, y_array, length);
12349ed4312SSebastian Grimberg   return 0;
12449ed4312SSebastian Grimberg }
12549ed4312SSebastian Grimberg 
12649ed4312SSebastian Grimberg //------------------------------------------------------------------------------
127