xref: /libCEED/backends/hip-ref/kernels/hip-ref-vector.hip.cpp (revision b7453713e95c1c6eb59ce174cbcb87227e92884e)
13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
30d0321e0SJeremy L Thompson //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
50d0321e0SJeremy L Thompson //
63d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
70d0321e0SJeremy L Thompson 
849aac155SJeremy L Thompson #include <ceed.h>
90d0321e0SJeremy L Thompson #include <hip/hip_runtime.h>
100d0321e0SJeremy L Thompson 
110d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
120d0321e0SJeremy L Thompson // Kernel for set value on device
130d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
149330daecSnbeams __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedSize size, CeedScalar val) {
15*b7453713SJeremy L Thompson   CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
16*b7453713SJeremy L Thompson 
17*b7453713SJeremy L Thompson   if (index >= size) return;
18*b7453713SJeremy L Thompson   vec[index] = val;
190d0321e0SJeremy L Thompson }
200d0321e0SJeremy L Thompson 
210d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
220d0321e0SJeremy L Thompson // Set value on device memory
230d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
249330daecSnbeams extern "C" int CeedDeviceSetValue_Hip(CeedScalar *d_array, CeedSize length, CeedScalar val) {
25*b7453713SJeremy L Thompson   const int      block_size = 512;
26*b7453713SJeremy L Thompson   const CeedSize vec_size   = length;
27*b7453713SJeremy L Thompson   int            grid_size  = vec_size / block_size;
280d0321e0SJeremy L Thompson 
29*b7453713SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
30*b7453713SJeremy L Thompson   hipLaunchKernelGGL(setValueK, dim3(grid_size), dim3(block_size), 0, 0, d_array, length, val);
310d0321e0SJeremy L Thompson   return 0;
320d0321e0SJeremy L Thompson }
330d0321e0SJeremy L Thompson 
340d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
350d0321e0SJeremy L Thompson // Kernel for taking reciprocal
360d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
379330daecSnbeams __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedSize size) {
38*b7453713SJeremy L Thompson   CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
39*b7453713SJeremy L Thompson 
40*b7453713SJeremy L Thompson   if (index >= size) return;
41*b7453713SJeremy L Thompson   if (fabs(vec[index]) > 1E-16) vec[index] = 1. / vec[index];
420d0321e0SJeremy L Thompson }
430d0321e0SJeremy L Thompson 
440d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
450d0321e0SJeremy L Thompson // Take vector reciprocal in device memory
460d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
479330daecSnbeams extern "C" int CeedDeviceReciprocal_Hip(CeedScalar *d_array, CeedSize length) {
48*b7453713SJeremy L Thompson   const int      block_size = 512;
49*b7453713SJeremy L Thompson   const CeedSize vec_size   = length;
50*b7453713SJeremy L Thompson   int            grid_size  = vec_size / block_size;
510d0321e0SJeremy L Thompson 
52*b7453713SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
53*b7453713SJeremy L Thompson   hipLaunchKernelGGL(rcpValueK, dim3(grid_size), dim3(block_size), 0, 0, d_array, length);
540d0321e0SJeremy L Thompson   return 0;
550d0321e0SJeremy L Thompson }
560d0321e0SJeremy L Thompson 
570d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
580d0321e0SJeremy L Thompson // Kernel for scale
590d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
609330daecSnbeams __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedSize size) {
61*b7453713SJeremy L Thompson   CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
62*b7453713SJeremy L Thompson 
63*b7453713SJeremy L Thompson   if (index >= size) return;
64*b7453713SJeremy L Thompson   x[index] *= alpha;
650d0321e0SJeremy L Thompson }
660d0321e0SJeremy L Thompson 
670d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
680d0321e0SJeremy L Thompson // Compute x = alpha x on device
690d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
709330daecSnbeams extern "C" int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha, CeedSize length) {
71*b7453713SJeremy L Thompson   const int      block_size = 512;
72*b7453713SJeremy L Thompson   const CeedSize vec_size   = length;
73*b7453713SJeremy L Thompson   int            grid_size  = vec_size / block_size;
740d0321e0SJeremy L Thompson 
75*b7453713SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
76*b7453713SJeremy L Thompson   hipLaunchKernelGGL(scaleValueK, dim3(grid_size), dim3(block_size), 0, 0, x_array, alpha, length);
770d0321e0SJeremy L Thompson   return 0;
780d0321e0SJeremy L Thompson }
790d0321e0SJeremy L Thompson 
800d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
810d0321e0SJeremy L Thompson // Kernel for axpy
820d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
839330daecSnbeams __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedSize size) {
84*b7453713SJeremy L Thompson   CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
85*b7453713SJeremy L Thompson 
86*b7453713SJeremy L Thompson   if (index >= size) return;
87*b7453713SJeremy L Thompson   y[index] += alpha * x[index];
880d0321e0SJeremy L Thompson }
890d0321e0SJeremy L Thompson 
900d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
910d0321e0SJeremy L Thompson // Compute y = alpha x + y on device
920d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
939330daecSnbeams extern "C" int CeedDeviceAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) {
94*b7453713SJeremy L Thompson   const int      block_size = 512;
95*b7453713SJeremy L Thompson   const CeedSize vec_size   = length;
96*b7453713SJeremy L Thompson   int            grid_size  = vec_size / block_size;
970d0321e0SJeremy L Thompson 
98*b7453713SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
99*b7453713SJeremy L Thompson   hipLaunchKernelGGL(axpyValueK, dim3(grid_size), dim3(block_size), 0, 0, y_array, alpha, x_array, length);
1000d0321e0SJeremy L Thompson   return 0;
1010d0321e0SJeremy L Thompson }
1020d0321e0SJeremy L Thompson 
1030d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1045fb68f37SKaren (Ren) Stengel // Kernel for axpby
1055fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------
1069330daecSnbeams __global__ static void axpbyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar beta, CeedScalar *__restrict__ x, CeedSize size) {
107*b7453713SJeremy L Thompson   CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
108*b7453713SJeremy L Thompson 
109*b7453713SJeremy L Thompson   if (index >= size) return;
110*b7453713SJeremy L Thompson   y[index] = beta * y[index];
111*b7453713SJeremy L Thompson   y[index] += alpha * x[index];
1125fb68f37SKaren (Ren) Stengel }
1135fb68f37SKaren (Ren) Stengel 
1145fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------
1155fb68f37SKaren (Ren) Stengel // Compute y = alpha x + beta y on device
1165fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------
1179330daecSnbeams extern "C" int CeedDeviceAXPBY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar beta, CeedScalar *x_array, CeedSize length) {
118*b7453713SJeremy L Thompson   const int      block_size = 512;
119*b7453713SJeremy L Thompson   const CeedSize vec_size   = length;
120*b7453713SJeremy L Thompson   int            grid_size  = vec_size / block_size;
1215fb68f37SKaren (Ren) Stengel 
122*b7453713SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
123*b7453713SJeremy L Thompson   hipLaunchKernelGGL(axpbyValueK, dim3(grid_size), dim3(block_size), 0, 0, y_array, alpha, beta, x_array, length);
1245fb68f37SKaren (Ren) Stengel   return 0;
1255fb68f37SKaren (Ren) Stengel }
1265fb68f37SKaren (Ren) Stengel 
1275fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------
1280d0321e0SJeremy L Thompson // Kernel for pointwise mult
1290d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1309330daecSnbeams __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedSize size) {
131*b7453713SJeremy L Thompson   CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;
132*b7453713SJeremy L Thompson 
133*b7453713SJeremy L Thompson   if (index >= size) return;
134*b7453713SJeremy L Thompson   w[index] = x[index] * y[index];
1350d0321e0SJeremy L Thompson }
1360d0321e0SJeremy L Thompson 
1370d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1380d0321e0SJeremy L Thompson // Compute the pointwise multiplication w = x .* y on device
1390d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1409330daecSnbeams extern "C" int CeedDevicePointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) {
141*b7453713SJeremy L Thompson   const int      block_size = 512;
142*b7453713SJeremy L Thompson   const CeedSize vec_size   = length;
143*b7453713SJeremy L Thompson   int            grid_size  = vec_size / block_size;
1440d0321e0SJeremy L Thompson 
145*b7453713SJeremy L Thompson   if (block_size * grid_size < vec_size) grid_size += 1;
146*b7453713SJeremy L Thompson   hipLaunchKernelGGL(pointwiseMultValueK, dim3(grid_size), dim3(block_size), 0, 0, w_array, x_array, y_array, length);
1470d0321e0SJeremy L Thompson   return 0;
1480d0321e0SJeremy L Thompson }
1492a86cc9dSSebastian Grimberg 
1502a86cc9dSSebastian Grimberg //------------------------------------------------------------------------------
151